pax_global_header 0000666 0000000 0000000 00000000064 15000221702 0014477 g ustar 00root root 0000000 0000000 52 comment=7ff177c18ece5409e4cc73c134914d6989182bd0
zDNN-1.1.2/ 0000775 0000000 0000000 00000000000 15000221702 0012311 5 ustar 00root root 0000000 0000000 zDNN-1.1.2/.gitattributes 0000664 0000000 0000000 00000000253 15000221702 0015204 0 ustar 00root root 0000000 0000000 * zos-working-tree-encoding=iso8859-1
*.groovy zos-working-tree-encoding=utf-8
# Autodetect text files
* text=auto
# Definitively text files
*.c text
*.h text
*.cpp text
zDNN-1.1.2/.github/ 0000775 0000000 0000000 00000000000 15000221702 0013651 5 ustar 00root root 0000000 0000000 zDNN-1.1.2/.github/ISSUE_TEMPLATE/ 0000775 0000000 0000000 00000000000 15000221702 0016034 5 ustar 00root root 0000000 0000000 zDNN-1.1.2/.github/ISSUE_TEMPLATE/bug_report.md 0000664 0000000 0000000 00000000761 15000221702 0020532 0 ustar 00root root 0000000 0000000 ---
name: Bug
about: File a bug/issue
title: '[BUG]
'
labels: Bug, Needs Triage
assignees: ''
---
### Current Observation:
### Expected:
### Location:
### Anything else:
zDNN-1.1.2/.github/ISSUE_TEMPLATE/feature_request.md 0000664 0000000 0000000 00000001135 15000221702 0021561 0 ustar 00root root 0000000 0000000 ---
name: Feature request
about: Suggest an idea for this project
title: ''
labels: ''
assignees: ''
---
**Is your feature request related to a problem? Please describe.**
A clear and concise description of what the problem is. Ex. I'm always frustrated when [...]
**Describe the solution or source you'd like**
A clear and concise description of what you want to happen.
**Describe alternatives you've considered**
A clear and concise description of any alternative solutions or features you've considered.
**Additional context**
Add any other context or screenshots about the feature request here.
zDNN-1.1.2/.gitmodules 0000664 0000000 0000000 00000000171 15000221702 0014465 0 ustar 00root root 0000000 0000000 [submodule "tests/third_party/Unity"]
path = tests/third_party/Unity
url = https://github.com/ThrowTheSwitch/Unity.git
zDNN-1.1.2/CONTRIBUTING.md 0000664 0000000 0000000 00000017062 15000221702 0014550 0 ustar 00root root 0000000 0000000 Contributing to zDNN
==========================
License
-------
All contributions have to be submitted under the Apache 2.0 license. See also
the [LICENSE](LICENSE) file.
Developer's Certificate of Origin and Signed-off-by
---------------------------------------------------
The sign-off is a simple line at the end of the explanation for the patch,
which certifies that you wrote it or otherwise have the right to pass it on as
an open-source patch.
With the Signed-off-by line you certify the below:
```
Developer's Certificate of Origin 1.1
By making a contribution to this project, I certify that:
(a) The contribution was created in whole or in part by me and I
have the right to submit it under the open source license
indicated in the file; or
(b) The contribution is based upon previous work that, to the best
of my knowledge, is covered under an appropriate open source
license and I have the right under that license to submit that
work with modifications, whether created in whole or in part
by me, under the same open source license (unless I am
permitted to submit under a different license), as indicated
in the file; or
(c) The contribution was provided directly to me by some other
person who certified (a), (b) or (c) and I have not modified
it.
(d) I understand and agree that this project and the contribution
are public and that a record of the contribution (including all
personal information I submit with it, including my sign-off) is
maintained indefinitely and may be redistributed consistent with
this project or the open source license(s) involved.
```
If you can certify the above, just add a line stating the following at the
bottom of each of your commit messages:
```
Signed-off-by: Random Developer
```
Please use your real name and a valid e-mail address (no pseudonyms or anonymous
contributions).
Submitting code
---------------
The preferred way is to create GitHub pull requests for your code contributions.
Please create separate pull requests for each logical enhancement, new feature,
or fix.
GitHub workflow for contributions
---------------------------------
In the examples below we use this fictive identity:
- Name: Random Developer
- E-mail: random@developer.example.org
- GitHub ID: random-developer
### Setup GitHub and local git
1. Create a fork of this repository by clicking the `Fork` button on the top
right of the [zDNN](https://github.com/IBM/zDNN)
main page
2. Clone your forked repository to your local development system
```
$ git clone https://github.com/random-developer/zDNN.git
```
3. Configure a remote called "upstream" pointing to the official
zDNN repository on GitHub
```
$ cd zDNN
~/zDNN $ git remote add upstream https://github.com/IBM/zDNN.git
```
4. Verify your remotes
```
~/zDNN $ git remote -v
origin https://github.com/random-developer/zDNN.git (fetch)
origin https://github.com/random-developer/zDNN.git (push)
upstream https://github.com/IBM/zDNN.git (fetch)
upstream https://github.com/IBM/zDNN.git (push)
```
You now have two remotes: The "origin" remote points to your fork
and the "upstream" remote to the official zDNN repository.
5. Configure your git user name and e-mail
```
~/zDNN $ git config user.name "Random Developer"
~/zDNN $ git config user.email "random@developer.example.com"
```
### Create a pull request
1. Create and checkout a new branch for your contribution
```
~/zDNN $ git checkout -b contrib-doc-pr
```
2. Make your changes to the code
```
~/zDNN $ vim CONTRIBUTING.md
```
3. Build and test your contribution, recommended on NNPA enabled machine.
```
~/zDNN $ make clean all
```
4. Commit your changes
```
~/zDNN $ git add CONTRIBUTING.md
~/zDNN $ git commit -s
```
Provide a meaningful commit message including your "Signed-off-by" line to
each commit:
```
CONTRIBUTING: Outline steps to submit code
Explain in more detail how to submit zDNN contributions as GitHub
pull requests.
Signed-off-by: Random Developer
```
5. Push the changes to your fork of the repository
```
~/zDNN $ git push origin contrib-doc-pr
```
6. Go to the GitHub website of your zDNN fork and create a pull request
for your branch "contrib-doc-pr"
### Update a pull request during review
If there are changes requested during the review process, you have to update
your code in the pull request.
To retain the existing review comments, add commits on top of your pull request
branch. Depending on the size and number of changes, a rebase of the pull
request might be required. This will be communicated during the review.
1. Update your code with new commits
```
~/zDNN $ vi CONTRIBUTING.md
~/zDNN $ git add CONTRIBUTING.md
~/zDNN $ git commit -s -m "CONTRIBUTING: Add update PR info"
```
2. Update your pull request by pushing changes
```
~/zDNN $ git push origin contrib-doc-pr
```
### Finalize a pull request
After the review process is finished or if you are explicitly asked for it,
you have to create a clean commit series.
1. Save branch to "contrib-doc-pr.v1"
```
$ cd zDNN
~/zDNN $ git branch contrib-doc-pr.v1
```
2. Use interactive git rebase to merge commits, adjust commit messages,
and rebase onto your local main branch
```
~/zDNN $ git rebase -i main
```
An editor is started and shows the following:
```
pick 2c73b9fc CONTRIBUTING: Outline steps to submit code
pick fcfb0412 CONTRIBUTING: Add update PR info
```
To merge the update into the original commit, replace "pick fcfb0412"
with "squash fcfb0412".
```
pick 2c73b9fc CONTRIBUTING: Outline steps to submit code
squash fcfb0412 CONTRIBUTING: Add update PR info
```
Save the document and exit the editor to finish the merge. Another editor
window is presented to modify the commit message.
You now could change the commit message as follows:
```
CONTRIBUTING: Outline steps to submit code
Explain in more detail how to submit zDNN contributions as GitHub
pull requests and how to update already submitted pull requests.
Signed-off-by: Random Developer
```
With interactive rebasing you can also change the order of commits and
modify commit messages with "reword".
3. Use `git push` with the force option to replace the existing pull request
with your locally modified commits
```
~/zDNN $ git push --force origin contrib-doc-pr
```
### Rebase a pull request
If changes are made to the main branch in the official zDNN
repository you may be asked to rebase your branch with your contribution
onto it. This can be required to prevent any merge conflicts that might
arise when integrating your contribution.
1. Fetch all upstream changes from the official zDNN repository,
rebase your local main branch and update the main branch
on your fork
```
~/zDNN $ git fetch upstream
~/zDNN $ git checkout main
~/zDNN $ git rebase upstream/main
~/zDNN $ git push origin main
```
2. Rebase your branch with your contribution onto the main branch of
the official zDNN repository
```
~/zDNN $ git checkout contrib-doc-pr
~/zDNN $ git rebase main
```
3. Use `git push` with the force option to replace the existing pull
request with your locally modified commits
```
~/zDNN $ git push --force origin contrib-doc-pr
```
zDNN-1.1.2/LICENSE 0000664 0000000 0000000 00000026517 15000221702 0013331 0 ustar 00root root 0000000 0000000
Apache License
Version 2.0, January 2004
http://www.apache.org/licenses/
TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
1. Definitions.
"License" shall mean the terms and conditions for use, reproduction,
and distribution as defined by Sections 1 through 9 of this document.
"Licensor" shall mean the copyright owner or entity authorized by
the copyright owner that is granting the License.
"Legal Entity" shall mean the union of the acting entity and all
other entities that control, are controlled by, or are under common
control with that entity. For the purposes of this definition,
"control" means (i) the power, direct or indirect, to cause the
direction or management of such entity, whether by contract or
otherwise, or (ii) ownership of fifty percent (50%) or more of the
outstanding shares, or (iii) beneficial ownership of such entity.
"You" (or "Your") shall mean an individual or Legal Entity
exercising permissions granted by this License.
"Source" form shall mean the preferred form for making modifications,
including but not limited to software source code, documentation
source, and configuration files.
"Object" form shall mean any form resulting from mechanical
transformation or translation of a Source form, including but
not limited to compiled object code, generated documentation,
and conversions to other media types.
"Work" shall mean the work of authorship, whether in Source or
Object form, made available under the License, as indicated by a
copyright notice that is included in or attached to the work
(an example is provided in the Appendix below).
"Derivative Works" shall mean any work, whether in Source or Object
form, that is based on (or derived from) the Work and for which the
editorial revisions, annotations, elaborations, or other modifications
represent, as a whole, an original work of authorship. For the purposes
of this License, Derivative Works shall not include works that remain
separable from, or merely link (or bind by name) to the interfaces of,
the Work and Derivative Works thereof.
"Contribution" shall mean any work of authorship, including
the original version of the Work and any modifications or additions
to that Work or Derivative Works thereof, that is intentionally
submitted to Licensor for inclusion in the Work by the copyright owner
or by an individual or Legal Entity authorized to submit on behalf of
the copyright owner. For the purposes of this definition, "submitted"
means any form of electronic, verbal, or written communication sent
to the Licensor or its representatives, including but not limited to
communication on electronic mailing lists, source code control systems,
and issue tracking systems that are managed by, or on behalf of, the
Licensor for the purpose of discussing and improving the Work, but
excluding communication that is conspicuously marked or otherwise
designated in writing by the copyright owner as "Not a Contribution."
"Contributor" shall mean Licensor and any individual or Legal Entity
on behalf of whom a Contribution has been received by Licensor and
subsequently incorporated within the Work.
2. Grant of Copyright License. Subject to the terms and conditions of
this License, each Contributor hereby grants to You a perpetual,
worldwide, non-exclusive, no-charge, royalty-free, irrevocable
copyright license to reproduce, prepare Derivative Works of,
publicly display, publicly perform, sublicense, and distribute the
Work and such Derivative Works in Source or Object form.
3. Grant of Patent License. Subject to the terms and conditions of
this License, each Contributor hereby grants to You a perpetual,
worldwide, non-exclusive, no-charge, royalty-free, irrevocable
(except as stated in this section) patent license to make, have made,
use, offer to sell, sell, import, and otherwise transfer the Work,
where such license applies only to those patent claims licensable
by such Contributor that are necessarily infringed by their
Contribution(s) alone or by combination of their Contribution(s)
with the Work to which such Contribution(s) was submitted. If You
institute patent litigation against any entity (including a
cross-claim or counterclaim in a lawsuit) alleging that the Work
or a Contribution incorporated within the Work constitutes direct
or contributory patent infringement, then any patent licenses
granted to You under this License for that Work shall terminate
as of the date such litigation is filed.
4. Redistribution. You may reproduce and distribute copies of the
Work or Derivative Works thereof in any medium, with or without
modifications, and in Source or Object form, provided that You
meet the following conditions:
(a) You must give any other recipients of the Work or
Derivative Works a copy of this License; and
(b) You must cause any modified files to carry prominent notices
stating that You changed the files; and
(c) You must retain, in the Source form of any Derivative Works
that You distribute, all copyright, patent, trademark, and
attribution notices from the Source form of the Work,
excluding those notices that do not pertain to any part of
the Derivative Works; and
(d) If the Work includes a "NOTICE" text file as part of its
distribution, then any Derivative Works that You distribute must
include a readable copy of the attribution notices contained
within such NOTICE file, excluding those notices that do not
pertain to any part of the Derivative Works, in at least one
of the following places: within a NOTICE text file distributed
as part of the Derivative Works; within the Source form or
documentation, if provided along with the Derivative Works; or,
within a display generated by the Derivative Works, if and
wherever such third-party notices normally appear. The contents
of the NOTICE file are for informational purposes only and
do not modify the License. You may add Your own attribution
notices within Derivative Works that You distribute, alongside
or as an addendum to the NOTICE text from the Work, provided
that such additional attribution notices cannot be construed
as modifying the License.
You may add Your own copyright statement to Your modifications and
may provide additional or different license terms and conditions
for use, reproduction, or distribution of Your modifications, or
for any such Derivative Works as a whole, provided Your use,
reproduction, and distribution of the Work otherwise complies with
the conditions stated in this License.
5. Submission of Contributions. Unless You explicitly state otherwise,
any Contribution intentionally submitted for inclusion in the Work
by You to the Licensor shall be under the terms and conditions of
this License, without any additional terms or conditions.
Notwithstanding the above, nothing herein shall supersede or modify
the terms of any separate license agreement you may have executed
with Licensor regarding such Contributions.
6. Trademarks. This License does not grant permission to use the trade
names, trademarks, service marks, or product names of the Licensor,
except as required for reasonable and customary use in describing the
origin of the Work and reproducing the content of the NOTICE file.
7. Disclaimer of Warranty. Unless required by applicable law or
agreed to in writing, Licensor provides the Work (and each
Contributor provides its Contributions) on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
implied, including, without limitation, any warranties or conditions
of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
PARTICULAR PURPOSE. You are solely responsible for determining the
appropriateness of using or redistributing the Work and assume any
risks associated with Your exercise of permissions under this License.
8. Limitation of Liability. In no event and under no legal theory,
whether in tort (including negligence), contract, or otherwise,
unless required by applicable law (such as deliberate and grossly
negligent acts) or agreed to in writing, shall any Contributor be
liable to You for damages, including any direct, indirect, special,
incidental, or consequential damages of any character arising as a
result of this License or out of the use or inability to use the
Work (including but not limited to damages for loss of goodwill,
work stoppage, computer failure or malfunction, or any and all
other commercial damages or losses), even if such Contributor
has been advised of the possibility of such damages.
9. Accepting Warranty or Additional Liability. While redistributing
the Work or Derivative Works thereof, You may choose to offer,
and charge a fee for, acceptance of support, warranty, indemnity,
or other liability obligations and/or rights consistent with this
License. However, in accepting such obligations, You may act only
on Your own behalf and on Your sole responsibility, not on behalf
of any other Contributor, and only if You agree to indemnify,
defend, and hold each Contributor harmless for any liability
incurred by, or claims asserted against, such Contributor by reason
of your accepting any such warranty or additional liability.
END OF TERMS AND CONDITIONS
APPENDIX: How to apply the Apache License to your work.
To apply the Apache License to your work, attach the following
boilerplate notice, with the fields enclosed by brackets "[]"
replaced with your own identifying information. (Don't include
the brackets!) The text should be enclosed in the appropriate
comment syntax for the file format. We also recommend that a
file or class name and description of purpose be included on the
same "printed page" as the copyright notice for easier
identification within third-party archives.
Copyright [yyyy] [name of copyright owner]
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
This product relies on various third-party components under other open
source licenses. This section summarizes those components and their
licenses. See licenses/ for text of these licenses.
MIT License
-----------
tests/third_party/Unity zDNN-1.1.2/Makefile 0000664 0000000 0000000 00000002501 15000221702 0013747 0 ustar 00root root 0000000 0000000 # SPDX-License-Identifier: Apache-2.0
#
# Copyright IBM Corp. 2021
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
.DEFAULT_GOAL := all
.PHONY: all
all: config.make
$(MAKE) all -C zdnn
$(MAKE) all -C tests
.PHONY: help
help:
@echo "Available targets:"
@egrep "^[a-z]+:" Makefile | cut -d':' -f1 | sort | xargs -n 1 echo " "
.PHONY: build
build: config.make
$(MAKE) all -C zdnn
.PHONY: test
test: config.make
$(MAKE) all -C tests
.PHONY: clean
clean: config.make
$(MAKE) clean -C tests
$(MAKE) clean -C zdnn
.PHONY: distclean
distclean: clean
rm -f config.log config.status config.make config.h
.PHONY: install
install: build
$(MAKE) install -C zdnn
config.make:
# Use this additional check to allow make invocation "make -B build" in jenkins.
ifeq ($(wildcard config.make),)
$(error "Please use configure first")
endif
zDNN-1.1.2/README.md 0000664 0000000 0000000 00001061277 15000221702 0013606 0 ustar 00root root 0000000 0000000 # zDNN API Reference
## Contacts
- Nicholas Marion ()
- Andreas Krebbel ()
- Steven Jones ()
## Version
## Table of Contents
1. [Overview](#overview)
2. [Environment](#environment)
3. [Building zDNN](#building-and-installing-zdnn)
4. [Common Data Types and Structs](#common-types-and-structs)
- [Version Information](#common-version-info)
- [zDNN zTensor](#common-ztensor)
- [General zTensor Requirements](#gen-zten-reqs)
- [Concatenated zTensor Requirements](#concat-zten-reqs)
- [Quantized zTensor Requirements](#quan-zten-reqs)
- [zDNN Tensor Descriptors](#common-descriptors)
- [zDNN Data Layouts](#common-layouts)
- [zDNN Data Formats](#common-formats)
- [zDNN Data Types](#common-types)
- [zDNN Quantized Transform Types](#quantized-transform-types)
- [zDNN Statuses](#common-statuses)
5. [Runtime Environment Variables](#env-vars)
6. [Validating the Runtime Environment](#runtime-val)
7. [API Reference](#api-reference)
- [Support Functions](#support-functions)
- [Data Transformation](#data-transformation)
- [Operations](#operations)
- [Element-wise](#elwise-ops)
- [Activation](#act-ops)
- [Normalization](#norm-ops)
- [Matmul with Operation](#zdnn_matmul_op)
- [Matmul Broadcast with Operation](#zdnn_matmul_bcast_op)
- [Matmul Transpose with Operation](#zdnn_matmul_transpose_op)
- [Quantized Matmul Operation](#zdnn_quantized_matmul_op)
- [LSTM](#zdnn_lstm)
- [GRU](#zdnn_gru)
- [Average Pool 2D](#zdnn_avgpool2d)
- [Max Pool 2D](#zdnn_maxpool2d)
- [Convolution 2D](#zdnn_conv2d)
- [Convenience Functions](#convenience-functions)
8. [Usage Examples](#usage-examples)
## Overview
**Deep Learning Library** - the deep learning library support (zDNN) is the SW
enablement technology provided by IBM to meet the following requirements:
- Specialized-function-assist instructions are intended to provide performance
improvements for specific operations used in software libraries, utilities,
and operating system (OS) services. The facilities and instructions described
as specialized-function-assist instructions may be replaced or removed in the
future. As such, the IBM recommendation for these instructions is that a
software library or operating system function be used instead of directly
accessing the instructions. This is the function provided by zDNN.
- zAIU has very complex data layout requirements; these requirements arrange the
tensor to enhance the performance characteristics of the operations. zDNN will
format the tensor appropriately on behalf of the caller, and it will do so
using an optimized approach.
- For deep learning operations, zAIU requires the use of internal data types:
- DLFLOAT16, a 2-byte data type supported in Telum I, which optimizes training
and inference while minimizing the loss of accuracy at inference time
(versus standard 4-byte formats),
- INT8, a 1-byte data type supported with Telum II, which allows tensor
quantization features.
The zDNN library provides a set of APIs that an exploiter will utilize to drive
the desired request. zDNN will be available on both z/OS and Linux on Z; the
inclusion of Linux on Z provides particular benefit, as it will allow us to
enable acceleration in frameworks for z/OS via z/OS Container Extensions (zCX).
---
## Environment
z/OS:
- Problem state
- AMODE64
- XPLINK
### Alignment requirements
#### zAIU Op Limits
_This implies a zDNN limitation as well at this point._
- For all ops:
- Number of elements in any dimension must not exceed the value returned by
`zdnn_get_max_for_dim(uint8_t dimension)`
- Total number of bytes required for storing a transformed tensor must not
exceed the value returned by `zdnn_get_nnpa_max_tensor_size()`
### Application interfaces for zAIU Enterprise Neural Network Inference
#### zDNN General
The zDNN deep learning library provides the standard IBM Z software interface to
the zAIU. This IBM-provided C library provides a set of functions that handle
the data transformation requirements of the zAIU and provide wrapper functions
for the NNPA instruction primitives.
The zDNN functions use the following criteria to determine if zAIU can be used
to accelerate a deep learning primitive:
- Neural Network Processing Assist (NNPA) facility indicator in the system STFLE
output.
- Output of the NNPA-QAF (Query Available Functions) request.
#### Using zDNN
To use the IBM-provided zDNN C library for the NNPA instruction, follow these
steps:
1. Link or re-link applications to use the IBM-provided zDNN. The IBM-provided
zDNN is a library file in the z/OS UNIX System Services file system and can
be statically or dynamically linked into your applications. The paths for the
zDNN archive file and the zDNN header files are:
**z/OS (LE required):** Path for 64-bit dynamic library files:
- `/lib/libzdnn.so`
- `/lib/libzdnn.x`
Path for the zDNN header files:
- `/usr/include/`
The XL C/C++ compiler and the z/OS Language Environment provide various
environment variables to control processing, in addition to the variables
provided by the zDNN library itself.
1. Use the environment variable `_CEE_RUNOPTS` to specify invocation Language
Environment runtime options. For more information about using the environment
variable `_CEE_RUNOPTS` and other C and LE variables, see z/OS XL C/C++
Programming Guide.
2. For environment variables accepted by the zDNN library, see
[Runtime Environment Variables](#env-vars).
**Linux on Z:**
On Linux on Z we expect to ship source as well a package-installable library and
header. The library installation will conform to the standards of the packaging
method chosen.
---
## Building and Installing zDNN
### Clone the Repository and Submodules
```
git clone --recurse-submodules git@github.com:IBM/zDNN.git
```
### Create configure script
To create configure script
```
autoreconf .
```
### Configure Build
Prepare the build and install environment and check for necessary dependencies
using `./configure` script.
```
./configure [OPTION]... [VAR=VALUE]...
```
#### Installation Options
- `--prefix=PREFIX`
- Install architecture-independent files in PREFIX. Default location is
`/usr/local`
- `--exec-prefix=EPREFIX`
- Install architecture-independent files in EPREFIX. Default location is
`PREFIX`
_To explore all available configuration options and features, use `-h`_
### Build Library
Compile zDNN library using:
```
make build
```
### Run Tests
To run tests:
```
make test
```
#### Unity Requirement
_Please note that the Unity test framework source code is required to run unit
tests. If you did not clone submodules along with initial zDNN clone, please
perform the following steps to setup Unity prior to issuing `make tests`:_
1. Clone the source code from the
[Throw The Switch - Unity](https://github.com/ThrowTheSwitch/Unity)
repository.
2. Set the `UNITY_ROOT` environment variable to the folder containing the Unity
source code.
#### Python Package Requirements
_Please note that `junit_xml` and `pyparsing` are required python packages in
order to properly parse and format Unity test results. Follow standard python
package installation practices to meet requirements._
### Install
Install zDNN library:
```
sudo make install
```
### Reference Commands
Configure help:
```
./configure -h
```
Make help:
```
make help
```
### Prerequisite Tools
Compilers:
- `GCC: GNU Compiler Collection (gcc)`
or
- `IBM XL C/C++: (xlc)`
Build Tools and Dependencies:
- `Autoconf`
- `Make`
- `Unity`
- `Python Packages` _For formatting test results_
- junit_xml
- pyparsing
---
## Common Types and Structs
Include Files: `zdnn.h`
### Version Information
[Back to Table of Contents](#TOC)
```C
#define ZDNN_VERSION "1.1.2"
#define ZDNN_VERNUM 0x010102 // 0x[major][minor][patch]
#define ZDNN_VER_MAJOR 1
#define ZDNN_VER_MINOR 1
#define ZDNN_VER_PATCH 2
```
1. zDNN major version (_ZDNN_VER_MAJOR_) will be incremented if any backwards
incompatible changes are introduced to the API. It may also include minor and
patch level changes. Patch and minor version will be reset to 0 when major
version is incremented.
2. zDNN minor version (_ZDNN_VER_MINOR_) will be incremented if new, backwards
compatible functionalities are introduced to the API or if any API
functionalities are marked as deprecated. It may also include patch level
changes. Patch version will be reset to 0 when minor version is incremented.
3. zDNN patch version (_ZDNN_VER_PATCH_) will be incremented if only backwards
compatible bug fixes are introduced. A bug fix being defined as an internal
change that fixes incorrect behavior.
Functions for checking version incompatibility with the zDNN load library are
provided and described in the [Support Functions](#support-functions) section.
### zDNN zTensor
[Back to Table of Contents](#TOC)
```C
typedef struct zdnn_ztensor {
zdnn_tensor_desc
*pre_transformed_desc; // tensor's shape information before transformation
zdnn_tensor_desc *transformed_desc; // transformed tensor's shape information
uint64_t buffer_size; // tensor size in bytes
void *buffer; // pointer to the tensor in memory
bool is_transformed; // indicator if data in buffer has been transformed
char reserved[3]; // not currently used, should contain zeros.
float rec_scale; // the scale factor for quantization, stored as reciprocal
float offset; // the offset for quantization
char reserved2[20]; // not currently used, should contain zeros.
} zdnn_ztensor;
```
#### General zTensor Requirements
[Back to Table of Contents](#TOC)
- `buffer` requirements:
- Calling [zdnn_init_ztensor_with_malloc](#zdnn_init_ztensor_with_malloc)
automatically allocates and sets a valid `buffer` for a tensor.
- `buffer` field must point to storage allocated of sufficient size to contain
the transformed tensor data described by the its `transformed_desc` field.
- Calling [zdnn_getsize_ztensor](#zdnn_getsize_ztensor) with the tensor's
`transformed_desc` returns the required size.
- Start of `buffer` field must be 4k aligned.
- `reserved` should contain zeros, otherwise the program may not operate
compatibly in the future.
- Calling [zdnn_init_ztensor](#zdnn_init_ztensor) or
[zdnn_init_ztensor_with_malloc](#zdnn_init_ztensor_with_malloc) will set
`reserved` to zeros.
#### Concatenated zTensor Requirements
[Back to Table of Contents](#TOC)
- For use with weights/biases/hidden-weights/hidden-biases RNN-gates tensors.
- You must use
[zdnn_generate_transformed_desc_concatenated](#zdnn_generate_transformed_desc_concatenated)
with the appropriate concatenation info
- Do not use `zdnn_generate_transformed_desc` with concatenated tensors
- The pre-transformed shape dimensions should not include the concatenation.
- Thus, the pre-transformed shape should be that of a single gate, not the
shape of the combined gates
- Afterward transform with [zdnn_transform_ztensor](#zdnn_transform_ztensor) as
normal
- Must follow [general tensor requirements](#gen-zten-reqs)
#### Quantized zTensor Requirements
[Back to Table of Contents](#TOC)
- Supported `transform_desc` and `pre_transformed_desc` types for
[zdnn_transform_quantized_ztensor](#zdnn_transform_quantized_ztensor) and
[zdnn_generate_quantized_transformed_desc](#zdnn_generate_quantized_transformed_desc):
- `ZDNN_FORMAT_4DFEATURE` format:
- ZDNN_DLFLOAT16
- FP16, FP32, BFLOAT
- ZDNN_BINARY_INT8
- INT8, FP16, FP32, BFLOAT
- `ZDNN_FORMAT_4DWEIGHTS` format:
- ZDNN_BINARY_INT8
- INT8
### zDNN Tensor Descriptors
[Back to Table of Contents](#TOC)
```C
typedef struct zdnn_tensor_desc {
zdnn_data_layouts layout; // data layout
zdnn_data_formats format; // internal use only
zdnn_data_types type; // data type
uint32_t dim4; // number of elements in outermost dimension
uint32_t dim3; // ... outer dimension
uint32_t dim2; // ... inner dimension
uint32_t dim1; // number of elements in innermost dimension
} zdnn_tensor_desc;
```
#### Programming Notes
- Helper methods
[zdnn_init_pre_transformed_desc](#zdnn_init_pre_transformed_desc) and
[zdnn_generate_transformed_desc](#zdnn_generate_transformed_desc) or
[zdnn_generate_transformed_desc_concatenated](#zdnn_generate_transformed_desc_concatenated)
will set the correct dims based on the layout and format.
- The [layout](#common-layouts) of the tensor descriptor affects the expected
order of the dims. For example:
- For tensors with less than 4 dimensions, unspecified dims:
- In the [pre_transformed_desc](#common-ztensor) are ignored. For example a
[ZDNN_3D](#common-layouts) expects values in dim4, dim3, and dim2.
- In the [transformed_desc](#common-ztensor) "unused" dims must be 1.
- A [ZDNN_NHWC](#common-layouts) expects dims such that dim4 = N, dim3 = H,
dim2 = W, dim1 = C
- A [ZDNN_NCHW](#common-layouts) expects dims such that dim4 = N, dim3 = C,
dim2 = H, dim1 = W
- A [ZDNN_HWCK](#common-layouts) expects dims such that dim4 = H, dim3 = W,
dim2 = C, dim1 = K
- The [format](#common-formats) changes the expected dims order for
[ZDNN_4D](#common-layouts) tensors layouts
- [ZDNN_FORMAT_4DFEATURE](#common-formats) expects dims such that dim4 = N,
dim3 = H, dim2 = W, dim1 = C
- [ZDNN_FORMAT_4DKERNEL](#common-formats) expects dims such that dim4 = H,
dim3 = W, dim2 = C, dim1 = K
### zDNN Data Layouts
[Back to Table of Contents](#TOC)
The following are layouts for zDNN ztensor descriptors. These indicate the
number and order of dimensions to expect for the ztensor data.
```C
typedef enum zdnn_data_layouts {
ZDNN_1D, // 1d tensor
ZDNN_2D, // 2d tensor
ZDNN_2DS, // represents special 2D tensors required by LSTM/GRU
ZDNN_3D, // 3d tensor
ZDNN_3DS, // represents special 3D tensors required by
// LSTM/GRU/Softmax/Matmul
ZDNN_ZRH, // represents (update, reset, hidden) used by GRU
ZDNN_4D, // 4d tensor
ZDNN_4DS, // represents special 4D tensors required by LSTM/GRU output
ZDNN_NHWC, // 4d feature tensor in NHWC
ZDNN_NCHW, // 4d feature tensor in NCHW
ZDNN_FICO, // represents (forget, input, cell, output) used by LSTM
ZDNN_HWCK, // 4d kernel CNN tensor
ZDNN_BIDIR_ZRH, // ZRH variant to work with bidirectional LSTM/GRU output
ZDNN_BIDIR_FICO // FICO variant to work with bidirectional LSTM/GRU output
} zdnn_data_layouts;
```
Some layouts also indicate special re-arrangement of the data during ztensor
transformation.
- `ZDNN_2DS` - The outermost dimension of the original shape is promoted to dim4
during transformation. For example, a shape of (a, b) becomes [a, 1, 1, b]
(dim4, dim3, dim2, dim1) in the `transformed_desc`
- `ZDNN_3DS` - The outermost dimension of the original shape is promoted to dim4
during transformation. For example, a shape of (a, b, c) becomes [a, 1, b, c]
(dim4, dim3, dim2, dim1) in the `transformed_desc`
- `ZDNN_4DS` - Arrangement for RNN output tensor
The followings are set automatically in `transformed_desc` based on `info` when
calling `zdnn_generate_transformed_desc_concatenated()`:
- `ZDNN_ZRH/FICO` - During transformation, the RNN input gates data are
concatenated on the innermost dimension. Supported with
`pre_transformed_layout` of `ZDNN_2DS` or `ZDNN_3DS`.
- `ZDNN_BIDIR_ZRH/FICO` - Similar to `ZDNN_ZRH/FICO`, used when:
1. transforming RNN input weight gate data, and
2. the input tensor for the current RNN layer is a bidirectional RNN output
from a previous RNN layer
### zDNN Data Formats
[Back to Table of Contents](#TOC)
```C
typedef enum zdnn_data_formats {
ZDNN_FORMAT_4DFEATURE, // tensor in zAIU data layout format 0
ZDNN_FORMAT_4DKERNEL, // tensor in zAIU data layout format 1
ZDNN_FORMAT_4DWEIGHTS, // tensor in zAIU data layout format 2
ZDNN_FORMAT_4DGENERIC, // tensor in zAIU data layout format 31
} zdnn_data_formats;
```
### zDNN Data Types
[Back to Table of Contents](#TOC)
```C
typedef enum zdnn_data_types {
ZDNN_DLFLOAT16, // 16-bit deep learning format
ZDNN_BINARY_FP32, // 32-bit binary-floating-point format
ZDNN_BINARY_INT8, // 8-bit signed or unsighed binary integer
ZDNN_BINARY_INT32, // 32-bit signed or unsigned binary integer
INT8, // 8-bit signed or unsigned binary integer format
INT32, // 32-bit signed or unsigned binary integer format
BFLOAT, // Brain floating point format
FP16, // 16-bit IEEE-754 floating point format
FP32, // 32-bit IEEE-754 floating point format
} zdnn_data_types;
```
### zDNN Quantized Transform Types
[Back to Table of Contents](#TOC)
```C
typedef enum zdnn_quantized_transform_types {
QUANTIZED_DLFLOAT16 = 0, // quantized dlfloat16
QUANTIZED_INT8 = 1, // quantized int8
QUANTIZED_WEIGHTS_INT8 = 2 // quantized weights
} zdnn_quantized_transform_types;
```
### zDNN Statuses
[Back to Table of Contents](#TOC)
| Mnemonic Constant | Value | Meaning |
| -------------------------------- | ---------- | ------------------------------ |
| ZDNN_OK | 0x00000000 | Success. |
#### Warning Statuses
| Mnemonic Constant | Value | Meaning |
| -------------------------------- | ---------- | ------------------------------ |
| ZDNN_ELEMENT_RANGE_VIOLATION | 0x00020001 | zAIU operation resulted in data that was out of the normal range. |
_Note: ZDNN_ELEMENT_RANGE_VIOLATION indicates a **range violation** occurred for
the zAIU operation based on the data in the tensors. This usually indicates an
overflow of an NNPA internal data type, but can also be associated with
operation specific errors, such as "divide by zero". See the "z/Architecture
Principles of Operation" for information about range violation on the operation
that encountered the violation._
#### General Failing Statuses
| Mnemonic Constant | Value | Meaning |
| -------------------------------- | ---------- | ------------------------------ |
| ZDNN_INVALID_SHAPE\* | 0x00040001 | Invalid shape information in one (or more) of the input/output tensor(s). |
| ZDNN_INVALID_LAYOUT | 0x00040002 | Invalid layout information in one (or more) of the input/output tensor(s). |
| ZDNN_INVALID_TYPE\* | 0x00040003 | Invalid type information in one (or more) of the input/output tensor(s). |
| ZDNN_INVALID_FORMAT\* | 0x00040004 | Invalid format information in one (or more) of the input/output tensor(s). |
| ZDNN_INVALID_DIRECTION | 0x00040005 | Invalid RNN direction. |
| ZDNN_INVALID_CONCAT_INFO | 0x00040006 | Invalid concatenation info. |
| ZDNN_INVALID_STRIDE_PADDING\* | 0x00040007 | Invalid padding type parameter for current strides. |
| ZDNN_INVALID_STRIDES\* | 0x00040008 | Invalid stride height or width parameter. |
| ZDNN_MISALIGNED_PARMBLOCK\* | 0x00040009 | NNPA parameter block is not on double word boundary. |
| ZDNN_INVALID_CLIPPING_VALUE | 0x0004000A | Invalid clipping for the specified operation. |
| ZDNN_INVALID_ADJUSTMENT_FACTOR | 0x0004000B | Invalid adjustment for the specified operation. |
| ZDNN_INVALID_EPSILON | 0x0004000C | Invalid epsilon for the specified operation. |
| ZDNN_INVALID_TRANSFORM_TYPE | 0x0004000D | Invalid transformation type. |
| ZDNN_INVALID_BETA | 0x0004000E | Invalid beta value for the specified operation. |
| ZDNN_INVALID_GAMMA | 0x0004000F | Invalid gamma value for the specified operation. |
| ZDNN_INVALID_BESSEL_CORRECTION | 0x00040010 | Invalid bessel correction value for the specified operation. |
| ZDNN_INVALID_SCALE | 0x00040011 | Invalid scale value for the specified operation. |
| ZDNN_INVALID_OFFSET | 0x00040012 | Invalid offset value for the specified operation. |
| ZDNN_ALLOCATION_FAILURE | 0x00100001 | Can not allocate storage. |
| ZDNN_INVALID_BUFFER | 0x00100002 | Buffer address is NULL or not on 4K-byte boundary or insufficient buffer size. |
| ZDNN_CONVERT_FAILURE | 0x00100003 | Floating point data conversion failure. |
| ZDNN_INVALID_STATE | 0x00100004 | Invalid zTensor state. |
| ZDNN_UNSUPPORTED_AIU_EXCEPTION | 0x00100005 | zAIU operation returned an unexpected exception. |
_Note: \*In certain scenarios, these statuses are returned only if
[ZDNN_ENABLE_PRECHECK](#env-vars) is enabled. When not enabled, these scenarios
will lead to abnormal program termination._
#### Hardware Statuses
The following statuses indicate issues returned from the hardware.
| Mnemonic Constant | Value | Meaning |
| -------------------------------- | ---------- | ------------------------------ |
| ZDNN_UNSUPPORTED_PARMBLOCK | 0x000C0001 | NNPA parameter block format is not supported by the model. |
| ZDNN_UNAVAILABLE_FUNCTION | 0x000C0002 | Specified NNPA function is not defined or installed on the machine. |
| ZDNN_UNSUPPORTED_FORMAT | 0x000C0010 | Specified tensor data layout format is not supported. |
| ZDNN_UNSUPPORTED_TYPE | 0x000C0011 | Specified tensor data type is not supported. |
| ZDNN_EXCEEDS_MDIS | 0x000C0012 | Tensor dimension exceeds maximum dimension index size (MDIS). |
| ZDNN_EXCEEDS_MTS | 0x000C0013 | Total number of bytes in tensor exceeds maximum tensor size. (MTS). |
| ZDNN_MISALIGNED_TENSOR | 0x000C0014 | Tensor address is not on 4K-byte boundary. |
| ZDNN_MISALIGNED_SAVEAREA | 0x000C0015 | Function specific save area address is not on 4K-byte boundary. |
The meaning of the following hardware statuses vary based on operation. See the
operation that returned the status for the specific meaning.
| Mnemonic Constant | Value | Meaning |
| -------------------------------- | ---------- | ------------------------------ |
| ZDNN_FUNC_RC_F000 | 0x000CF000 | Function specific response code (F000). |
| ZDNN_FUNC_RC_F001 | 0x000CF001 | Function specific response code (F001). |
| ZDNN_FUNC_RC_F002 | 0x000CF002 | Function specific response code (F002). |
| ZDNN_FUNC_RC_F003 | 0x000CF003 | Function specific response code (F003). |
| ZDNN_FUNC_RC_F004 | 0x000CF004 | Function specific response code (F004). |
| ZDNN_FUNC_RC_F005 | 0x000CF005 | Function specific response code (F005). |
| ZDNN_FUNC_RC_F006 | 0x000CF006 | Function specific response code (F006). |
| ZDNN_FUNC_RC_F007 | 0x000CF007 | Function specific response code (F007). |
| ZDNN_FUNC_RC_F008 | 0x000CF008 | Function specific response code (F008). |
| ZDNN_FUNC_RC_F009 | 0x000CF009 | Function specific response code (F009). |
---
## Runtime Environment Variables
[Back to Table of Contents](#TOC)
- `ZDNN_ENABLE_PRECHECK`: true/false
- If set to `true`, tensor integrity prechecks are run before issuing NNPA
operations.
- Enabling precheck may impact performance.
- Enable to debug issues which cause hardware exceptions that otherwise would
result in abnormal program termination.
- `ZDNN_STATUS_DIAG`: nnnnnnnn (decimal) or 0xnnnnnnnn (hexadecimal)
- Prints or produces diagnostic information whenever zDNN status code is equal
to the specified value. Only one status value can be specified.
_The following are only available when the zDNN library was built with
`ZDNN_CONFIG_DEBUG` enabled._
- `ZDNN_LOGLEVEL`: off/fatal/error/warn/info/debug/trace
- Sets logging facility's output level
- `ZDNN_LOGMODULE`: module name(s)
- Produces log output only when the issuer's module name is in the list. You
may specify multiple module names by separating them with either commas or
spaces.
### Programming Notes
- Environment variables settings are checked during initial library load by
[zdnn_init](#zdnn_init).
- To change environment variable settings afterward, [zdnn_init](#zdnn_init)
must be called again manually.
## Validating the environment at runtime
### Programming Notes
- Most API calls require a minimum zDNN library and hardware for the API to
function. There are three zDNN APIs for validation of the zDNN runtime
environment:
- Validating the zDNN Library version:
- This is the version of the libzdnn package installed on the host or
embedded in the runtime application.
- The zDNN library version is independent of the hardware available on the
current system.
- zDNN APIs introduced in newer versions of the zDNN library will not exist
in older versions of the library. Attempting to call them will result in
application crashes.
- The zDNN library version is returned by
[zdnn_get_library_version](#zdnn_get_library_version).
- Validating the zDNN API version:
- This is the version of zDNN APIs that are compatible on the current system
and is separate of the zDNN library version.
- Calling zDNN APIs while running on a system which does not support that
zDNN API version will return a [hardware status](#hw-statuses) instead of
[ZDNN_OK](#common-statuses).
- The zDNN API version available is returned by
[zdnn_get_max_runnable_version](#zdnn_get_max_runnable_version) and is
reflected in the return value of
[zdnn_is_version_runnable](#zdnn_is_version_runnable).
- zDNN API 1.0.x indicates the API requires Telum I or greater.
- zDNN API 1.1.x indicates the API requires Telum II or greater.
- Validating NNPA availability:
- This indicates if the current system has zAIU hardware present and
enabled.
- It is possible to be on a system with zAIU hardware but the feature is
unavailable, such as z/VM when there is a mix of hardware levels.
- This is returned by [zdnn_is_nnpa_installed](#zdnn_is_nnpa_installed)
- Examples:
- Given a Telum I system with zDNN 1.1.0 installed:
- [zdnn_get_library_version](#zdnn_get_library_version) will return
`0x00010100` indicating zDNN library 1.1.0 is installed.
- [zdnn_is_nnpa_installed](#zdnn_is_nnpa_installed) will return `true`
(unless the zAIU feature is disabled for the system).
- [zdnn_get_max_runnable_version](#zdnn_get_max_runnable_version) will
return `0x000100FF` indicating zDNN APIs 1.0.x and below are available for
use on the system.
- Checking [zdnn_is_version_runnable(0x00010100)](#zdnn_is_version_runnable)
(1.1.0) will return `false` as only zDNN APIs 1.0.x and below are
available for use on the system.
- Checking [zdnn_is_version_runnable(0x00010100)](#zdnn_is_version_runnable)
(1.0.0) will return `true` as zDNN APIs 1.0.x and below are available for
use on the system.
- Given a Telum II system with zDNN 1.1.0 installed:
- [zdnn_get_library_version](#zdnn_get_library_version) will return
`0x00010100` indicating zDNN library 1.1.0 is installed.
- [zdnn_is_nnpa_installed](#zdnn_is_nnpa_installed) will return `true`
(unless the zAIU feature is disabled for the system).
- [zdnn_get_max_runnable_version](#zdnn_get_max_runnable_version) will
return `0x000101FF` indicating zDNN APIs 1.1.x and below are available for
use on the system.
- Checking [zdnn_is_version_runnable(0x00010100)](#zdnn_is_version_runnable)
(1.1.0) will return `true` as zDNN APIs 1.1.x and below are available for
use on the system.
- Checking [zdnn_is_version_runnable(0x00010100)](#zdnn_is_version_runnable)
(1.0.0) will return `true` as zDNN APIs 1.1.x and below are available for
use on the system.
- Given a Telum II system with zDNN 1.0.0 installed:
- [zdnn_get_library_version](#zdnn_get_library_version) will return
`0x00010000` indicating zDNN library 1.0.0 is installed.
- [zdnn_is_nnpa_installed](#zdnn_is_nnpa_installed) will return `true`
(unless the zAIU feature is disabled for the system).
- [zdnn_get_max_runnable_version](#zdnn_get_max_runnable_version) will
return `0x000100FF` indicating zDNN APIs 1.0.x and below are available for
use on the system.
- Checking [zdnn_is_version_runnable(0x00010100)](#zdnn_is_version_runnable)
(1.1.0) will return `false` as only zDNN APIs 1.0.x and below are
available for use on the system.
- Checking [zdnn_is_version_runnable(0x00010100)](#zdnn_is_version_runnable)
(1.0.0) will return `true` as zDNN APIs 1.1.x and below are available for
use on the system.
---
## API Reference
[Back to Table of Contents](#TOC)
- [Support Functions](#support-functions)
- [Data Transformation](#data-transformation)
- [Operations](#operations)
- [Convenience Functions](#convenience-functions)
---
## Support Functions
[Back to Table of Contents](#TOC)
- [Initialization](#zdnn_init)
- [Get smallest of the max index size value from across all dimensions](#zdnn_get_nnpa_max_dim_idx_size)
- [Get max index for a given dimension](#zdnn_get_max_for_dim)
- [Get Size](#zdnn_getsize_ztensor)
- [Get Range](#zdnn_getrange_ztensor)
- [Get maximum limit for a given data type](#zdnn_get_max_limit)
- [Get minimum limit for a given data type](#zdnn_get_min_limit)
- [Initialize pre-transformed tensor descriptor](#zdnn_init_pre_transformed_desc)
- [Generate transformed tensor descriptor](#zdnn_generate_transformed_desc)
- [Generate quantized transformed tensor descriptor](#zdnn_generate_quantized_transformed_desc)
- [Generate concatenated transformed tensor descriptor](#zdnn_generate_transformed_desc_concatenated)
- [Initialize zTensor](#zdnn_init_ztensor)
- [Initialize zTensor with memory allocate](#zdnn_init_ztensor_with_malloc)
- [Initialize quantized zTensor](#zdnn_init_quantized_ztensor)
- [Initialize quantized zTensor with memory allocate](#zdnn_init_quantized_ztensor_with_malloc)
- [Reset zTensor](#zdnn_reset_ztensor)
- [Allocate memory for zTensor](#zdnn_allochelper_ztensor)
- [De-allocate memory for zTensor](#zdnn_free_ztensor_buffer)
- [Retrieve status message of the status code](#zdnn_get_status_message)
- [Reshape zTensor](#zdnn_reshape_ztensor)
- [Check if version is runnable](#zdnn_is_version_runnable)
- [Get maximum runnable version](#zdnn_get_max_runnable_version)
---
### zdnn_init
#### Description
Initialize the zDNN library. This sends an NNPA_QAF to query the NNPA and loads
the current environment variable settings.
This needs to be invoked at least once if zDNN library is statically-linked. It
is automatically invoked if zDNN library is dynamically loaded.
#### Format
```C
void zdnn_init();
```
#### Parameters
None
#### Returns
None
#### Since
1.0.0
#### Requirements
This feature requires that:
- `zdnn_is_nnpa_installed()` returns true
- the underlying hardware supports zDNN APIs 1.0.x or later at runtime
See [Validating the environment at runtime](#runtime-val).
---
### zdnn_get_nnpa_max_dim_idx_size
#### Description
Retrieve the smallest of the maximum dimension index size values across all
dimensions currently supported by the zAIU from zDNN's internal memory.
#### Format
```C
uint32_t zdnn_get_nnpa_max_dim_idx_size();
```
#### Parameters
None
#### Returns
Maximum dimension index size supported by the zAIU across all dimensions
#### Since
Introduced in zDNN 1.0.0
#### Requirements
This feature requires that:
- `zdnn_is_nnpa_installed()` returns true
- the underlying hardware supports zDNN APIs 1.1.x or later at runtime
See [Validating the environment at runtime](#runtime-val).
---
### zdnn_get_max_for_dim
#### Description
Retrieve the maximum dimension index size value currently supported by the zAIU
for a given dimension from zDNN's internal memory. These limits relate to
ztensor's transformed descriptor values. Special care is required when using
layouts with special re-arrangements of data. See
[zDNN Data Layouts](#zdnn_data_layouts) for more details.
#### Format
```C
uint32_t zdnn_get_max_for_dim(uint8_t dimension);
```
#### Parameters
- `int dimension`
- dimension to get maximum index size for
#### Returns
Maximum dimension index size supported by the zAIU for a given dimension
#### Since
1.1.0
#### Requirements
This feature requires that:
- `zdnn_is_nnpa_installed()` returns true
- the underlying hardware supports zDNN APIs 1.1.x or later at runtime
See [Validating the environment at runtime](#runtime-val).
---
### zdnn_get_nnpa_max_tensor_size
#### Description
Retrieve the maximum tensor size value (number of bytes required for storing a
transformed tensor) currently supported by the zAIU from zDNN's internal memory.
#### Format
```C
uint64_t zdnn_get_nnpa_max_tensor_size();
```
#### Parameters
None
#### Returns
Maximum tensor size supported by the zAIU
---
#### Since
1.0.0
#### Requirements
This feature requires that:
- `zdnn_is_nnpa_installed()` returns true
- the underlying hardware supports zDNN APIs 1.0.x or later at runtime
See [Validating the environment at runtime](#runtime-val).
### zdnn_is_nnpa_installed
#### Description
Interrogates the hardware to determine if the NNPA and associated instructions
are installed.
Use this function during application initialization to determine whether the
zAIU hardware is available.
#### Format
```C
bool zdnn_is_nnpa_installed();
```
#### Parameters
- None.
#### Returns
`true` if NNPA and associated instructions are installed, `false` otherwise.
#### Since
1.0.0
#### Requirements
This feature requires that:
- `zdnn_is_nnpa_installed()` returns true
- the underlying hardware supports zDNN APIs 1.0.x or later at runtime
See [Validating the environment at runtime](#runtime-val).
---
### zdnn_is_nnpa_function_installed
#### Description
Query, from zDNN internal memory, if requested NNPA functions are available.
#### Format
```C
bool zdnn_is_nnpa_function_installed(int count, ...);
```
#### Parameters
- `int count`
- number of NNPA functions to check
- `... (additional arguments)`
- Function names separated by commas, e.g., _NNPA_MUL, NNPA_MIN_
```
NNPA_QAF
NNPA_ADD
NNPA_SUB
NNPA_MUL
NNPA_DIV
NNPA_MIN
NNPA_MAX
NNPA_LOG
NNPA_EXP
NNPA_RELU
NNPA_TANH
NNPA_SIGMOID
NNPA_SOFTMAX
NNPA_BATCHNORMALIZATION
NNPA_MAXPOOL2D
NNPA_AVGPOOL2D
NNPA_LSTMACT
NNPA_GRUACT
NNPA_CONVOLUTION
NNPA_MATMUL_OP
NNPA_MATMUL_OP_BCAST23
NNPA_MATMUL_OP_BCAST1
NNPA_TRANSFORM
```
#### Returns
`true` if all queried formats are installed or if `count` is zero, `false`
otherwise.
#### Since
1.0.0
#### Requirements
This feature requires that:
- `zdnn_is_nnpa_installed()` returns true
- the underlying hardware supports zDNN APIs 1.0.x or later at runtime
See [Validating the environment at runtime](#runtime-val).
---
### zdnn_is_nnpa_parmblk_fmt_installed
#### Description
Query, from zDNN internal memory, if requested parameter block formats are
installed.
#### Format
```C
bool zdnn_is_nnpa_parmblk_fmt_installed(int count, ...);
```
#### Parameters
- `int count`
- number of NNPA parameter block formats to check
- `... (additional arguments)`
- NNPA parameter block formats separated by commas
```
NNPA_PARMBLKFORMAT_0
NNPA_PARMBLKFORMAT_1
```
#### Returns
`true` if all queried formats are installed or if `count` is zero, `false`
otherwise.
#### Since
1.0.0
#### Requirements
This feature requires that:
- `zdnn_is_nnpa_installed()` returns true
- the underlying hardware supports zDNN APIs 1.0.x or later at runtime
See [Validating the environment at runtime](#runtime-val).
---
### zdnn_is_nnpa_datatype_installed
#### Description
Query, from zDNN internal memory, if requested NNPA data type are installed.
#### Format
```C
bool zdnn_is_nnpa_datatype_installed(uint16_t types_bitmask);
```
#### Parameters
- `uint16_t types_bitmask`
- OR'd type bitmasks as defined in zdnn_query_datatypes enum
```
QUERY_DATATYPE_INTERNAL1
QUERY_DATATYPE_BINARY_FP32
QUERY_DATATYPE_BINARY_INT8
QUERY_DATATYPE_BINARY_INT32
```
#### Returns
`true` if all queried data types are installed, `false` otherwise.
#### Since
1.0.0
#### Requirements
This feature requires that:
- `zdnn_is_nnpa_installed()` returns true
- the underlying hardware supports zDNN APIs 1.0.x or later at runtime
See [Validating the environment at runtime](#runtime-val).
---
### zdnn_is_nnpa_layout_fmt_installed
#### Description
Query, from zDNN internal memory, if requested NNPA data layout format are
installed.
#### Format
```C
bool zdnn_is_nnpa_layout_fmt_installed(uint32_t layout_bitmask);
```
#### Parameters
- `uint32_t layout_bitmask`
- OR'd layout bitmasks as defined in zdnn_query_layoutfmts enum
```
QUERY_LAYOUTFMT_4DFEATURE
QUERY_LAYOUTFMT_4DKERNEL
QUERY_LAYOUTFMT_4DWEIGHTS
QUERY_LAYOUTFMT_4DGENERIC
```
#### Returns
`true` if all queried data layouts are installed, `false` otherwise.
#### Since
1.0.0
#### Requirements
This feature requires that:
- `zdnn_is_nnpa_installed()` returns true
- the underlying hardware supports zDNN APIs 1.0.x or later at runtime
See [Validating the environment at runtime](#runtime-val).
---
### zdnn_is_nnpa_conversion_installed
#### Description
Query, from zDNN internal memory, if requested NNPA data-type to/from BFP format
conversions are installed.
#### Format
```C
bool zdnn_is_nnpa_conversion_installed(nnpa_data_type type,
uint16_t format_bitmask);
```
#### Parameters
- `nnpa_data_type type`
- NNPA data-type number as defined in nnpa_data_type enum
```
NNPA_DATATYPE_1
```
- `uint16_t format_bitmask`
- OR'd BFP format bitmasks as defined in zdnn_query_bfpfmts enum
```
QUERY_BFPFMT_TINY (FP16)
QUERY_BFPFMT_SHORT (FP32/BFLOAT)
```
#### Returns
`true` if all queried conversions are installed, `false` otherwise.
#### Since
1.0.0
#### Requirements
This feature requires that:
- `zdnn_is_nnpa_installed()` returns true
- the underlying hardware supports zDNN APIs 1.0.x or later at runtime
See [Validating the environment at runtime](#runtime-val).
---
### zdnn_get_library_version
#### Description
Retrieve library version number as a 32-bit hex value in the form
`0x00[major][minor][patch]` where each segment is 1 byte. For example zDNN 1.2.3
would return `0x00010203`.
This is the version of the libzdnn package installed on the system or zDNN
embeded in a runtime application. The zDNN library version is independant of the
system that zDNN is running on.
The library version indicates what zDNN APIs exist in that version of the zDNN
library. It does **NOT** indicate whether those APIs are available for use. To
check API availablity at runtime, see
[Validating the environment at runtime](#runtime-val).
#### Format
```
uint32_t zdnn_get_library_version();
```
#### Returns
Library version number in `0x00[major][minor][patch]` format.
#### Since
1.0.0
#### Requirements
- Any System Z hardware level
See [Validating the environment at runtime](#runtime-val).
---
### zdnn_get_library_version_str
#### Description
Retrieve the library version number and build information as a string.
#### Format
```C
char *zdnn_get_library_version_str();
```
#### Returns
Library version number and build information as a string.
#### Since
1.0.0
#### Requirements
- Any System Z hardware level
See [Validating the environment at runtime](#runtime-val).
---
### zdnn_refresh_nnpa_query_result
#### Description
Refresh zDNN in-memory query result from zAIU.
#### Format
```C
zdnn_status zdnn_refresh_nnpa_query_result();
```
#### Parameters
None
##### Programming Notes
This is called automatically as a part of `zdnn_init` and should not need to be
called directly. Manually refreshing query results before making other
`zdnn_query_*` calls may noticeably impact performance.
#### Returns zdnn_status indications
- `ZDNN_OK`
- `ZDNN_UNAVAILABLE_FUNCTION`
#### Since
1.0.0
#### Requirements
- Any System Z hardware level
See [Validating the environment at runtime](#runtime-val).
---
### zdnn_getsize_ztensor
#### Description
Used to determine the buffer size required for the transformed tensor (including
concatenated) in zDNN transformed format. Requires tensor descriptor
(`zdnn_tensor_desc`) with transformed shape information.
#### Format
```C
uint64_t zdnn_getsize_ztensor(const zdnn_tensor_desc *tfrmd_desc);
```
#### Parameters
- `zdnn_tensor_desc *tfrmd_desc`
- Contains transformed information about the shape, layout and data type.
#### Returns zdnn_status indications
- required buffer size in bytes
#### Since
1.0.0
#### Requirements
- Any System Z hardware level
See [Validating the environment at runtime](#runtime-val).
---
### zdnn_getrange_ztensor
#### Description
Used to determine the minimum negative value and maximum positive value of the
passed zdnn_ztensor, storing the results in min and max.
#### Format
```C
void zdnn_getrange_ztensor(const zdnn_ztensor *ztensor, float *min, float *max);
```
#### Parameters
- `const zdnn_ztensor *ztensor`
- The zdnn_ztensor to return the min and max value of.
- `float *min`
- Pointer to a float used to store minimum negative value.
- If all values are positive, -0.0 will be used instead.
- `float *max`
- Pointer to a float used to store maximum positive value.
- If all values are negative, 0.0 will be used instead.
#### Returns
- None
#### Since
1.1.0
#### Requirements
This feature requires that:
- `zdnn_is_nnpa_installed()` returns true
- the underlying hardware supports zDNN APIs 1.1.x or later at runtime
See [Validating the environment at runtime](#runtime-val).
---
### zdnn_get_max_limit
#### Description
Returns the maximum representable value between a transformed and
pre-transformed zdnn_data_type.
#### Format
```C
zdnn_status zdnn_get_max_limit(zdnn_data_types transformed_type,
zdnn_data_types pre_transformed_type, void *limit);
```
#### Parameters
- `zdnn_data_types transformed_type`
- input zdnn transformed data type.
- Restricted to the following transformed data types:
- ZDNN_DLFLOAT16
- ZDNN_BINARY_INT8
- ZDNN_BINARY_INT32
- `zdnn_data_types pre_transformed_type`
- input zdnn pre-transformed data type.
- Restricted to the following transformed data types:
- INT32
- INT8
- FP32
- FP16
- BFLOAT
- `void *limit`
- pointer to max value between transformed_type and pre_transformed_type in
data type of pre_transformed_type.
#### Returns
- `ZDNN_OK`
- `ZDNN_INVALID_TYPE` - invalid transformed or pre_transformed `type` used and
conversion could not be completed.
#### Since
1.1.0
#### Requirements
This feature requires that:
- `zdnn_is_nnpa_installed()` returns true
- the underlying hardware supports zDNN APIs 1.1.x or later at runtime
See [Validating the environment at runtime](#runtime-val).
---
### zdnn_get_min_limit
#### Description
Return the minimum representable value between a transformed and pre-transformed
zdnn_data_type.
#### Format
```C
zdnn_status zdnn_get_min_limit(zdnn_data_types transformed_type,
zdnn_data_types pre_transformed_type, void *limit);
```
#### Parameters
- `zdnn_data_types transformed_type`
- input zdnn transformed data type.
- Restricted to the following transformed data types:
- ZDNN_DLFLOAT16
- ZDNN_BINARY_INT8
- ZDNN_BINARY_INT32
- `zdnn_data_types pre_transformed_type`
- input zdnn pre-transformed data type.
- Restricted to the following transformed data types:
- INT32
- INT8
- FP32
- FP16
- BFLOAT
- `void *limit`
- pointer to min value between transformed_type and pre_transformed_type in
data type of pre_transformed_type.
#### Returns
- `ZDNN_OK`
- `ZDNN_INVALID_TYPE` - invalid transformed or pre_transformed `type` used and
conversion could not be completed.
#### Since
1.1.0
#### Requirements
This feature requires that:
- `zdnn_is_nnpa_installed()` returns true
- the underlying hardware supports zDNN APIs 1.1.x or later at runtime
See [Validating the environment at runtime](#runtime-val).
---
### zdnn_init_pre_transformed_desc
#### Description
Initialize tensor descriptor (`zdnn_tensor_desc`) struct with pre-transformed
(original) shape information.
#### Format
```C
void zdnn_init_pre_transformed_desc(zdnn_data_layouts layout,
zdnn_data_types type,
zdnn_tensor_desc *pre_tfrmd_desc, ...);
```
#### Parameters
- `zdnn_data_layouts layout`
- data layout
- `zdnn_data_types type`
- data type
- `zdnn_tensor_desc *pre_tfrmd_desc`
- output zdnn_tensor_desc struct
- `... (additional arguments)`
- Variadic: number of elements in each dimension in accordance to the layout,
in outermost to innermost order
#### Returns
- None
#### Since
1.0.0
#### Requirements
- Any System Z hardware level
See [Validating the environment at runtime](#runtime-val).
---
### zdnn_generate_transformed_desc
#### Description
Generate transformed tensor descriptor information based on supplied
pre-transformed tensor descriptor.
#### Format
```C
zdnn_status zdnn_generate_transformed_desc(
const zdnn_tensor_desc *pre_tfrmd_desc, zdnn_tensor_desc *tfrmd_desc);
```
#### Parameters
- `zdnn_tensor_desc *pre_tfrmd_desc`
- input tensor descriptor with pre-transformed shape information
- `zdnn_tensor_desc *tfrmd_desc`
- output `zdnn_tensor_desc` struct
#### zdnn_status indications
- `ZDNN_OK`
- `ZDNN_INVALID_TYPE` - pre-transformed `type` is not recognized or is a type
only used for quantized ztensors.
- `ZDNN_INVALID_LAYOUT` - pre-transformed `layout` is not recognized or is a
layout only used for concatenated tensors.
#### Since
1.0.0
#### Requirements
- Any System Z hardware level
See [Validating the environment at runtime](#runtime-val).
---
### zdnn_generate_quantized_transformed_desc
#### Description
Generate quantized transformed tensor descriptor information based on supplied
pre-transformed tensor descriptor and quantized transform type.
#### Format
```C
zdnn_status zdnn_generate_quantized_transformed_desc(
const zdnn_tensor_desc *pre_tfrmd_desc,
zdnn_quantized_transform_types transform_type,
zdnn_tensor_desc *tfrmd_desc);
```
#### Parameters
- `zdnn_tensor_desc *pre_tfrmd_desc`
- input tensor descriptor with pre-transformed shape information
- Has the following additional restrictions:
- Only the following pre-transformed layouts are supported.
- ZDNN_1D
- ZDNN_2D
- ZDNN_2DS
- ZDNN_3D
- ZDNN_3DS
- ZDNN_4D
- ZDNN_NHWC
- `zdnn_quantized_transform_types transform_type`
- Type of quantized transformation
- QUANTIZED_DLFLOAT16
- QUANTIZED_INT8
- QUANTIZED_WEIGHTS_INT8
- `zdnn_tensor_desc *tfrmd_desc`
- output `zdnn_tensor_desc` struct
#### zdnn_status indications
- `ZDNN_OK`
- `ZDNN_INVALID_TYPE` - pre-transformed `type` is not recognized, not supported
for quantized ztensors: [Quantized zTensor Requirements](#quan-zten-reqs)
- `ZDNN_INVALID_LAYOUT` - pre-transformed `layout` is not recognized, not
supported for quantized ztensors, or is a layout only used for concatenated
tensors.
- `ZDNN_INVALID_TRANSFORM_TYPE` - Invalid transformation type:
[Quantized zTensor Requirements](#quan-zten-reqs)
#### Since
1.1.0
#### Requirements
This feature requires that:
- `zdnn_is_nnpa_installed()` returns true
- the underlying hardware supports zDNN APIs 1.1.x or later at runtime
See [Validating the environment at runtime](#runtime-val).
---
### zdnn_generate_transformed_desc_concatenated
#### Description
Generate concatenated transformed tensor descriptor information for RNN
input-gates tensors based on a supplied pre-transformed tensor descriptor.
#### Format
```C
zdnn_status zdnn_generate_transformed_desc_concatenated(
const zdnn_tensor_desc *pre_tfrmd_desc,
zdnn_concat_info info, zdnn_tensor_desc *tfrmd_desc);
```
#### Parameters
- `zdnn_tensor_desc *pre_tfrmd_desc`
- input tensor descriptor with pre-transformed shape information
- `zdnn_concat_info info`
- Information about how the tensors will be concatenated, consists of the
RNN_TYPE, PREV_LAYER and USAGE flags OR'd together:
RNN_TYPE flags:
- RNN_TYPE_LSTM - For LSTM
- RNN_TYPE_GRU - For GRU
PREV_LAYER flags:
- PREV_LAYER_UNI - Previous RNN layer is uni-directional
- PREV_LAYER_NONE - Previous layer is not a RNN layer
- PREV_LAYER_BIDIR - Previous RNN layer is bi-directional
USAGE flags:
- USAGE_WEIGHTS - Concatenate as input weights
- USAGE_HIDDEN_WEIGHTS - Concatenate as input hidden-weights
- USAGE_BIASES - Concatenate as input biases
- USAGE_HIDDEN_BIASES - Concatenate as input hidden-biases
- `zdnn_tensor_desc *tfrmd_desc`
- output `zdnn_tensor_desc` struct
#### zdnn_status indications
- `ZDNN_OK`
- `ZDNN_INVALID_TYPE` - pre-transformed `type` is not recognized or is not
supported for concatenated tensors.
- `ZDNN_INVALID_LAYOUT` - pre-transformed `layout` is not recognized or is not
supported for concatenated tensors.
- `ZDNN_INVALID_CONCAT_INFO` - invalid concatenation information.
#### Since
1.0.0
#### Requirements
This feature requires that:
- `zdnn_is_nnpa_installed()` returns true
- the underlying hardware supports zDNN APIs 1.0.x or later at runtime
See [Validating the environment at runtime](#runtime-val).
---
### zdnn_init_ztensor
#### Description
Initialize a `zdnn_ztensor` struct using the pre-transformed and transformed
tensor shape information
#### Format
```C
void zdnn_init_ztensor(zdnn_tensor_desc *pre_tfrmd_desc,
zdnn_tensor_desc *tfrmd_desc, zdnn_ztensor *output);
```
#### Parameters
- `zdnn_tensor_desc *pre_tfrmd_desc`
- input tensor descriptor with pre-transformed shape information
- `zdnn_tensor_desc *tfrmd_desc`
- input tensor descriptor with transformed shape information
- `zdnn_ztensor *output`
- The `zdnn_ztensor` struct being initialized.
#### Returns
- None
#### Since
1.0.0
#### Requirements
This feature requires that:
- `zdnn_is_nnpa_installed()` returns true
- the underlying hardware supports zDNN APIs 1.0.x or later at runtime
See [Validating the environment at runtime](#runtime-val).
---
### zdnn_init_ztensor_with_malloc
#### Description
Same functionality as `zdnn_init_ztensor`, and computes the size required for
the tensor in the zDNN transformed format and allocates the storage for it. Sets
`buffer` and `buffer_size` fields within `output`.
#### Format
```C
zdnn_status zdnn_init_ztensor_with_malloc(zdnn_tensor_desc *pre_tfrmd_desc,
zdnn_tensor_desc *tfrmd_desc,
zdnn_ztensor *output);
```
#### Parameters
- `zdnn_tensor_desc *pre_tfrmd_desc`
- input tensor descriptor with pre-transformed shape information
- `zdnn_tensor_desc *tfrmd_desc`
- input tensor descriptor with transformed shape information
- `zdnn_ztensor *output`
- The `zdnn_ztensor` struct being initialized.
#### Returns zdnn_status indications
- `ZDNN_OK`
- `ZDNN_INVALID_FORMAT` - `tfrmd_desc->format` is not recognized.
- `ZDNN_INVALID_TYPE` - `tfrmd_desc->type` is not recognized or is a
pre_tfrmd_desc type.
- `ZDNN_INVALID_SHAPE` - (if any of the following are true)
- One of `tfrmd_desc->dim*` dimensions is 0.
- One of `tfrmd_desc->dim*` dimensions is greater than
[zdnn_get_max_for_dim](#zdnn_get_max_for_dim).
- Note: concatenation dimensions have a smaller maximum size. See
[LSTM](#lstm-hid_sz) or [GRU](#gru-hid_sz).
- The total number of tfrmd_desc elements is larger than
`zdnn_get_nnpa_max_tensor_size`.
- `ZDNN_ALLOCATION_FAILURE` - Unable to allocate required memory on a 4K
boundary.
#### Since
1.0.0
#### Requirements
This feature requires that:
- `zdnn_is_nnpa_installed()` returns true
- the underlying hardware supports zDNN APIs 1.0.x or later at runtime
See [Validating the environment at runtime](#runtime-val).
---
### zdnn_init_quantized_ztensor
#### Description
Initialize a `zdnn_ztensor` struct using the pre-transformed and quantized
transformed tensor shape information along with scale and offset.
#### Format
```C
void zdnn_init_quantized_ztensor(zdnn_tensor_desc *pre_tfrmd_desc,
zdnn_tensor_desc *tfrmd_desc, float scale,
float offset, zdnn_ztensor *output);
```
#### Parameters
- `zdnn_tensor_desc *pre_tfrmd_desc`
- input tensor descriptor with pre-transformed shape information
- `zdnn_tensor_desc *tfrmd_desc`
- input tensor descriptor with quantized transformed shape information
- `float scale`
- scale for quantized ztensor, must not be 0.
- `float offset`
- offset for quantized ztensor
- `zdnn_ztensor *output`
- The `zdnn_ztensor` struct being initialized.
#### Programming Notes
- The reciprocal of the `scale` value is stored as `output->rec_scale` and is
used within subsequent quantized calls with reduced precision. Due to this,
large `scale` values will lead to a `output->rec_scale` that underflows to 0.0
and will result in an error in subsequent quantized calls.
- The `offset` value is stored as `output->offset` and is used within subsequent
quantized calls with reduced precision. Due to this, large `offset` values
will overflow to infinity and will result in an error in subsequent quantized
calls.
#### Returns
- None
#### Since
1.1.0
#### Requirements
This feature requires that:
- `zdnn_is_nnpa_installed()` returns true
- the underlying hardware supports zDNN APIs 1.1.x or later at runtime
See [Validating the environment at runtime](#runtime-val).
---
### zdnn_init_quantized_ztensor_with_malloc
#### Description
Same functionality as `zdnn_init_quantized_ztensor`, and computes the size
required for the tensor in the zDNN transformed format and allocates the storage
for it. Sets `buffer` and `buffer_size` fields within `output`.
#### Format
```C
zdnn_status zdnn_init_quantized_ztensor_with_malloc(
zdnn_tensor_desc *pre_tfrmd_desc, zdnn_tensor_desc *tfrmd_desc, float scale,
float offset, zdnn_ztensor *output);
```
#### Parameters
- `zdnn_tensor_desc *pre_tfrmd_desc`
- input tensor descriptor with pre-transformed shape information
- `zdnn_tensor_desc *tfrmd_desc`
- input tensor descriptor with quantized transformed shape information
- `float scale`
- scale for quantized ztensor, must not be 0.
- `float offset`
- offset for quantized ztensor
- `zdnn_ztensor *output`
- The `zdnn_ztensor` struct being initialized.
#### Programming Notes
- The reciprocal of the `scale` value is stored as `output->rec_scale` and is
used within subsequent quantized calls with reduced precision. Due to this,
large `scale` values will lead to a `output->rec_scale` that underflows to 0.0
and will result in an error in subsequent quantized calls.
- The `offset` value is stored as `output->offset` and is used within subsequent
quantized calls with reduced precision. Due to this, large `offset` values
will overflow to infinity and will result in an error in subsequent quantized
calls.
#### Returns zdnn_status indications
- `ZDNN_OK`
- `ZDNN_INVALID_FORMAT` - `tfrmd_desc->format` is not recognized.
- `ZDNN_INVALID_TYPE` - `tfrmd_desc->type` is not recognized or is a
pre_tfrmd_desc type.
- `ZDNN_INVALID_SHAPE` - (if any of the following are true)
- One of `tfrmd_desc->dim*` dimensions is 0.
- One of `tfrmd_desc->dim*` dimensions is greater than
[zdnn_get_max_for_dim](#zdnn_get_max_for_dim).
- The total number of tfrmd_desc elements is larger than
`zdnn_get_nnpa_max_tensor_size`.
- `ZDNN_ALLOCATION_FAILURE` - Unable to allocate required memory on a 4K
boundary.
#### Since
1.1.0
#### Requirements
This feature requires that:
- `zdnn_is_nnpa_installed()` returns true
- the underlying hardware supports zDNN APIs 1.1.x or later at runtime
See [Validating the environment at runtime](#runtime-val).
---
### zdnn_is_quantized_ztensor
#### Description
Check if a given `zdnn_ztensor` represents a quantized ztensor or not
#### Format
```C
bool zdnn_is_quantized_ztensor(zdnn_ztensor *ztensor);
```
#### Parameters
- `zdnn_ztensor *ztensor`
- The `zdnn_ztensor` being checked.
#### Returns
`true` if `zdnn_ztensor` represents a quantized ztensor, `false` if not.
#### Since
1.1.0
#### Requirements
This feature requires that:
- `zdnn_is_nnpa_installed()` returns true
- the underlying hardware supports zDNN APIs 1.1.x or later at runtime
See [Validating the environment at runtime](#runtime-val).
---
### zdnn_reset_ztensor
#### Description
Reset a `zdnn_ztensor` struct for reuse.
_Note this operation does not set or reset the `buffer` and `buffer_size` fields
nor free the transformed area storage._
#### Format
```C
void zdnn_reset_ztensor(zdnn_ztensor *ztensor);
```
#### Parameters
- `zdnn_ztensor *output`
- The `zdnn_ztensor` struct being reset.
#### Returns
- None
#### Since
1.0.0
#### Requirements
- Any System Z hardware level
See [Validating the environment at runtime](#runtime-val).
---
### zdnn_allochelper_ztensor
#### Description
Calculate the size required for the tensor in the zDNN transformed format and
allocate the needed storage, satisfying alignment requirements. Sets `buffer`
and `buffer_size` fields within `ztensor`.
_Note that the calling application assumes ownership of this storage and is
responsible for freeing it._
#### Format
```C
zdnn_status zdnn_allochelper_ztensor(zdnn_ztensor *ztensor);
```
#### Parameters
- `zdnn_ztensor *ztensor`
- A `zdnn_ztensor` struct that contains the transformed shape information in
the `transformed_desc` field.
#### Returns zdnn_status indications
- `ZDNN_OK`
- `ZDNN_INVALID_FORMAT` - `ztensor->transformed_desc->format` is not recognized.
- `ZDNN_INVALID_TYPE` - `ztensor->transformed_desc->type` is not recognized or
is a pre_transformed_desc type.
- `ZDNN_INVALID_LAYOUT` - `zdnn_ztensor->transformed_desc->layout` is not
recognized or is not a valid transformed_desc layout.
- `ZDNN_INVALID_SHAPE` - (if any of the following are true)
- One of `ztensor->transformed_desc->dim*` dimensions is 0.
- One of `ztensor->transformed_desc->dim*` dimensions is greater than
[zdnn_get_max_for_dim](#zdnn_get_max_for_dim).
- Note: concatenation dimensions have a smaller maximum size. See
[LSTM](#lstm-hid_sz) or [GRU](#gru-hid_sz).
- The total number of transformed_desc elements is larger than
`zdnn_get_nnpa_max_tensor_size`.
- `ZDNN_ALLOCATION_FAILURE` - Unable to allocate required memory on a 4K
boundary.
#### Since
1.0.0
#### Requirements
- Any System Z hardware level
See [Validating the environment at runtime](#runtime-val).
---
### zdnn_free_ztensor_buffer
#### Description
Given an input zdnn_ztensor, zdnn_free_ztensor_buffer will free the transformed
area storage associated with it.
_Note that the routine does not free the storage allocated for the zdnn_ztensor
struct itself._
#### Format
```C
zdnn_status zdnn_free_ztensor_buffer(const zdnn_ztensor *ztensor);
```
#### Parameters
- `zdnn_ztensor *tensor`
- A `zdnn_ztensor` struct with field buffer pointing to storage allocated.
#### Returns zdnn_status indications
- `ZDNN_OK`
- `ZDNN_INVALID_BUFFER` - `tensor->buffer` is `NULL`
#### Since
1.0.0
#### Requirements
- Any System Z hardware level
See [Validating the environment at runtime](#runtime-val).
---
### zdnn_get_status_message
#### Description
Retrieve status message of the status code
#### Format
```C
const char *zdnn_get_status_message(zdnn_status status);
```
#### Parameters
- `zdnn_status status`
- Status code
#### Returns
Pointer to the description string or "(Status string is not defined.)" if
`status` is not defined.
#### Since
1.0.0
#### Requirements
- Any System Z hardware level
See [Validating the environment at runtime](#runtime-val).
---
### zdnn_reshape_ztensor
#### Description
Reshape and copy buffer content from source zTensor's buffer to destination
zTensor's in accordance to destination zTensor's shape.
The following conditions must be satisfied:
- Both tensor's transformed_desc must be fully initialized
- `dest->buffer` must be pre-allocated
- `src` must be transformed
- `dest` must be not already transformed
- Both `transformed_desc->layout` must be the same and either NHWC or HWCK
- Both zTensors must contain equal number of elements
#### Format
```C
zdnn_status zdnn_reshape_ztensor(const zdnn_ztensor *src, zdnn_ztensor *dest);
```
#### Parameters
- `src`
- Source zTensor to copy from
- `dest`
- Destination zTensor to copy to
#### Programming Notes
- If `src` and `dest` have the same `transformed_desc->dim1` dimension size, the
transformed data is directly copied to the destination without
untransformation.
- If `src` and `dest` have different `transformed_desc->dim1` dimension sizes,
reshaping will internally un-transform the source and then re-transform the
values into the destination.
#### Returns
- `ZDNN_OK`
- `ZDNN_INVALID_SHAPE` - (if any of the following are true)
- `src`'s and `dest`'s `transformed_desc->dim*` total to different numbers of
elements.
- One of `dest->transformed_desc->dim*` dimensions is 0.
- One of `dest->transformed_desc->dim*` dimensions is greater than
[zdnn_get_max_for_dim](#zdnn_get_max_for_dim).
- Note: concatenation dimensions have a smaller maximum size. See
[LSTM](#lstm-hid_sz) or [GRU](#gru-hid_sz).
- The total number of `dest->transformed_desc-dim*` elements is larger than
`zdnn_get_nnpa_max_tensor_size`.
- `ZDNN_INVALID_LAYOUT` - (if any of the following are true)
- `src`'s and `dest`'s `transformed_desc->layout` are not the same.
- `transformed_desc->layout` is not `ZDNN_NHWC` nor `ZDNN_HWCK`.
- `src->pre_transformed_desc->layout` is not recognized or is not a valid
pre_transformed_desc layout.
- `dest->pre_transformed_desc->layout` is not recognized or is not a valid
pre_transformed_desc layout.
- `ZDNN_INVALID_STATE` - (if any of the following are true)
- `src` is not already transformed.
- `dest` is already transformed.
- `ZDNN_INVALID_FORMAT` - `src->transformed_desc->format` is not
`ZDNN_FORMAT_4DFEATURE`.
- `ZDNN_INVALID_TYPE` (if any of the following are true)
- `src->pre_transformed_desc->type` is not recognized or is a transformed_desc
type.
- `dest->pre_transformed_desc->type` is not recognized or is a
transformed_desc type.
- `dest->transformed_desc->type` is not recognized or is a
pre_transformed_desc type.
- `ZDNN_INVALID_BUFFER` (if any of the following are true)
- `src->buffer` is `NULL`.
- `src->buffer` is not on a 4K boundary.
- `dest->buffer` is `NULL`.
- `dest->buffer` is not on a 4K boundary.
- `dest->buffer_size` is too small to hold transformed values.
- `ZDNN_CONVERT_FAILURE` - Values failed to un-transform or transform.
#### Since
1.0.0
#### Requirements
This feature requires that:
- `zdnn_is_nnpa_installed()` returns true
- the underlying hardware supports zDNN APIs 1.0.x or later at runtime
See [Validating the environment at runtime](#runtime-val).
---
### zdnn_is_version_runnable
#### Description
Check if application built for zDNN version `ver_num` can be run on the current
zAIU hardware with the installed zDNN library
#### Format
```C
bool zdnn_is_version_runnable(uint32_t ver_num);
```
#### Parameters
- `ver_num`
- Version number of the zDNN library application itself, in
0x00\[major\]\[minor\]\[patch\] form. Typically this is the ZDNN_VERNUM used
to compile the application
#### Returns
- true/false
#### Since
1.0.0
#### Requirements
This feature requires that:
- `zdnn_is_nnpa_installed()` returns true
- the underlying hardware supports zDNN APIs 1.0.x or later at runtime
See [Validating the environment at runtime](#runtime-val).
---
### zdnn_get_max_runnable_version
#### Description
Returns the maximum version number associated with the APIs supported by the
hardware and zDNN software in the current environment. This can be compared with
the version documented in the "REQUIRES" section of each programming interface
to discern whether the interface is supported at run-time.
The returned value is a version number in the `major`.`minor` format. APIs
defined at that level and below will be supported in the current environment.
#### Format
```C
uint32_t zdnn_get_max_runnable_version();
```
#### Parameters
- None
#### Returns
- A 32-bit zDNN version number in `0x00\[major\]\[minor\]FF` form.
#### Since
1.0.0
#### Requirements
This feature requires that:
- `zdnn_is_nnpa_installed()` returns true
- the underlying hardware supports zDNN APIs 1.0.x or later at runtime
See [Validating the environment at runtime](#runtime-val).
---
## Data Transformation
[Back to Table of Contents](#TOC)
- [Transform to zTensor](#zdnn_transform_ztensor)
- [Transform to zTensor with saturation](#zdnn_transform_ztensor_with_saturation)
- [Transform to quantized zTensor](#zdnn_transform_quantized_ztensor)
- [Transform to Original](#zdnn_transform_origtensor)
---
zAIU requires the tensor data to be arranged in a format that enhances the
performance characteristics of the operations. In this documentation, it is
referred to as "transformed format". In addition, data conversions are necessary
from the common formats (FP32, FP16, BFLOAT) to formats (DLFLOAT16) supported by
the zAIU (DLFLOAT16, INT8). The following functions are provided:
- '`zdnn_transform_ztensor` and `zdnn_transform_ztensor_with_saturation`
- These functions will transform the input tensor and convert the input data
to the format required by the zAIU. The resulting transformed ztensor can be
reused as many times as necessary.
- See [zdnn_transform_ztensor](#zdnn_transform_ztensor) and
[zdnn_transform_ztensor_with_saturation](#zdnn_transform_ztensor_with_saturation)
for details and restrictions on transforming an input tensor to the internal
format.
- `zdnn_transform_origtensor`
- zdnn_transform_origtensor transforms a ztensor (usually output from an
operation or network) to the format and data types that are usable by the
application.
- See [zdnn_transform_origtensor](#zdnn_transform_origtensor) for details on
transforming an input tensor to the internal format.
---
### zdnn_transform_ztensor
#### Description
Converts the input tensor to the supported transformed format for execution by
zdnn operations. If transformation is successful the `is_transformed` field
within `ztensor` will be set to `true` otherwise it is set to `false`.
Transformation will fail if `is_transformed` was already `true`.
_Note that the tensor layout in memory, once in transformed format, is dependent
on the content of the input tensor's descriptors (`zdnn_tensor_desc` fields).
Once converted, a `zdnn_ztensor` should only be manipulated by zDNN API
functions._
#### Format
```C
zdnn_status zdnn_transform_ztensor(zdnn_ztensor *ztensor, ...);
```
#### Parameters
- `zdnn_ztensor *tensor`
- The input `zdnn_ztensor` struct. `pre_transformed_desc` and
`transformed_desc` must be set, `is_transformed` must be `false`. A
4k-aligned tensor storage must be pre-allocated by the caller (directly or
by calling the zDNN allocation helper function) and field `buffer` must
point to the storage.
- `... (additional arguments)`
- Variadic: list of pointers for input data to be transformed:
- Non-concatenated: 1 data pointer
- LSTM concatenated: 4 data pointers, one for each input gate in Forget,
Input, Cell, Output (FICO) order
- GRU concatenated: 3 data pointers, one for each input gate in (Z)update,
Reset, Hidden, (ZRH) gate order
#### Programming Notes
- This function clears the pre-thread floating-point exception flags at entry,
and may set `FE_UNDERFLOW` / `FE_INVALID` / `FE_INEXACT` / `FE_OVERFLOW` when
it encounters errors during data conversion.
#### Returns zdnn_status indications
- `ZDNN_OK`
- `ZDNN_INVALID_FORMAT` - `zdnn_ztensor->transformed_desc->format` is not
recognized.
- `ZDNN_INVALID_LAYOUT` - (if any of the following are true)
- `zdnn_ztensor->pre_transformed_desc->layout` is not recognized or is not a
valid pre_transformed_desc layout.
- `zdnn_ztensor->transformed_desc->layout` is not recognized or is not a valid
transformed_desc layout.
- `ZDNN_INVALID_TYPE` - (if any of the following are true)
- `zdnn_ztensor->pre_transformed_desc->type` is not recognized or is a
transformed_desc type.
- `zdnn_ztensor->transformed_desc->type` is not recognized or is a
pre_transformed_desc type.
- `ZDNN_INVALID_BUFFER` (if any of the following are true)
- `buffer` is `NULL`.
- `buffer` is not on a 4K boundary.
- `buffer_size` is too small to hold transformed values.
- `ZDNN_INVALID_SHAPE` - (if any of the following are true)
- One of `zdnn_ztensor->transformed_desc->dim*` dimensions is 0.
- One of `zdnn_ztensor->transformed_desc->dim*` dimensions is greater than
[zdnn_get_max_for_dim](#zdnn_get_max_for_dim).
- Note: concatenation dimensions have a smaller maximum size. See
[LSTM](#lstm-hid_sz) or [GRU](#gru-hid_sz).
- The total number of transformed_desc elements is larger than
`zdnn_get_nnpa_max_tensor_size`.
- `ZDNN_INVALID_STATE` - Tensor is already transformed.
- `ZDNN_CONVERT_FAILURE` - Values failed to transform.
- [hardware statuses](#hw-statuses)
- `ZDNN_FUNC_RC_F000` - unsupported transformation function.
#### Since
1.0.0
#### Requirements
This feature requires that:
- `zdnn_is_nnpa_installed()` returns true
- the underlying hardware supports zDNN APIs 1.0.x or later at runtime
See [Validating the environment at runtime](#runtime-val).
---
### zdnn_transform_ztensor_with_saturation
#### Description
Converts the input tensor to the supported transformed format for execution by
zdnn operations. If during transformation, an element results in a value that
exceeds the smallest or largest value that can be represented by DLFLOAT16, the
resulting element will contain the smallest or largest value and no
range-violation status will be triggered. If transformation is successful the
`is_transformed` field within `ztensor` will be set to `true` otherwise it is
set to `false`. Transformation will fail if `is_transformed` was already `true`.
_Note that the tensor layout in memory, once in transformed format, is dependent
on the content of the input tensor's descriptors (`zdnn_tensor_desc` fields).
Once converted, a `zdnn_ztensor` should only be manipulated by zDNN API
functions._
#### Format
```C
zdnn_status zdnn_transform_ztensor_with_saturation(zdnn_ztensor *ztensor, ...);
```
#### Parameters
- `zdnn_ztensor *tensor`
- The input `zdnn_ztensor` struct. `pre_transformed_desc` and
`transformed_desc` must be set, `is_transformed` must be `false`. A
4k-aligned tensor storage must be pre-allocated by the caller (directly or
by calling the zDNN allocation helper function) and field `buffer` must
point to the storage.
- Has the following additional restrictions:
- Only non-quantized ztensors are supported. Use
`zdnn_transform_quantized_ztensor` if required.
- `... (additional arguments)`
- Variadic: list of pointers for input data to be transformed:
- 1 data pointer supported at this time.
#### Returns zdnn_status indications
- `ZDNN_OK`
- `ZDNN_ELEMENT_RANGE_VIOLATION`
- `ZDNN_INVALID_FORMAT` - `zdnn_ztensor->transformed_desc->format` is not
ZDNN_FORMAT_4DFEATURE.
- `ZDNN_INVALID_LAYOUT` - (if any of the following are true)
- `zdnn_ztensor->pre_transformed_desc->layout` is not recognized or is not a
valid pre_transformed_desc layout.
- `zdnn_ztensor->transformed_desc->layout` is not recognized or is not a valid
transformed_desc layout.
- `ZDNN_INVALID_TYPE` - (if any of the following are true)
- `zdnn_ztensor->pre_transformed_desc->type` is not recognized or is not a
valid pre_transformed_desc type.
- `zdnn_ztensor->transformed_desc->type` is not recognized or is not a valid
transformed_desc type.
- `ZDNN_INVALID_BUFFER` (if any of the following are true)
- `buffer` is `NULL`.
- `buffer` is not on a 4K boundary.
- `buffer_size` is too small to hold transformed values.
- `ZDNN_INVALID_SHAPE` - (if any of the following are true)
- One of `zdnn_ztensor->transformed_desc->dim*` dimensions is 0.
- One of `zdnn_ztensor->transformed_desc->dim*` dimensions is greater than
[zdnn_get_max_for_dim](#zdnn_get_max_for_dim).
- The total number of transformed_desc elements is larger than
`zdnn_get_nnpa_max_tensor_size`.
- `ZDNN_INVALID_STATE` - Tensor is already transformed.
- [hardware statuses](#hw-statuses)
- `ZDNN_FUNC_RC_F000` - unsupported transformation function.
#### Since
1.1.0
#### Requirements
This feature requires that:
- `zdnn_is_nnpa_installed()` returns true
- the underlying hardware supports zDNN APIs 1.1.x or later at runtime
See [Validating the environment at runtime](#runtime-val).
---
### zdnn_transform_quantized_ztensor
#### Description
Converts the input tensor to the supported quantized transformed format for
execution by zdnn operations. If transformation is successful the
`is_transformed` field within `ztensor` will be set to `true` otherwise it is
set to `false`. Transformation will fail if `is_transformed` was already `true`.
_Note that the tensor layout in memory, once in transformed format, is dependent
on the content of the input tensor's descriptors (`zdnn_tensor_desc` fields).
Once converted, a `zdnn_ztensor` should only be manipulated by zDNN API
functions._
#### Format
```C
zdnn_status zdnn_transform_quantized_ztensor(zdnn_ztensor *ztensor,
bool saturation_control,
int8_t clip_min, int8_t clip_max,
const void *data);
```
#### Parameters
- `zdnn_ztensor *tensor`
- The input `zdnn_ztensor` struct. `pre_transformed_desc` and
`transformed_desc` must be set, `is_transformed` must be `false`. A
4k-aligned tensor storage must be pre-allocated by the caller (directly or
by calling the zDNN allocation helper function) and field `buffer` must
point to the storage.
- Has the following additional restrictions:
- Only the following pre-transformed layouts are supported.
- ZDNN_1D
- ZDNN_2D
- ZDNN_2DS
- ZDNN_3D
- ZDNN_3DS
- ZDNN_4D
- ZDNN_NHWC
- Only NHWC transformed layout is supported.
- See [Quantized zTensor Requirements](#quan-zten-reqs) for supported
transform types.
- `bool saturation_control`
- When enabled and an element results in a value that exceeds the smallest or
largest value that can be represented by DLFLOAT16, the resulting element
will contain the smallest or largest value and no range-violation status
will be triggered.
- Only applicable when all the following are true:
- `zdnn_ztensor *tensor` is of zdnn_quantized_transform_types
QUANTIZED_DLFLOAT16.
- The `pre_transformed_desc` `type` of the `zdnn_ztensor *tensor` is FP32.
- `int8_t clip_min`
- Minimum clipping value
- Only applicable when `zdnn_ztensor *tensor` is of
zdnn_quantized_transform_types QUANTIZED_INT8.
- Must be less than `clip_max`
- `int8_t clip_max`
- Maximum clipping value
- Only applicable when `zdnn_ztensor *tensor` is of
zdnn_quantized_transform_types QUANTIZED_INT8.
- Must be greater than `clip_min`
#### Programming Notes
- This function clears the pre-thread floating-point exception flags at entry,
and may set `FE_UNDERFLOW` / `FE_INVALID` / `FE_INEXACT` / `FE_OVERFLOW` when
it encounters errors during data conversion.
#### Returns zdnn_status indications
- `ZDNN_OK`
- `ZDNN_INVALID_FORMAT` - `zdnn_ztensor->transformed_desc->format` is not
recognized.
- `ZDNN_INVALID_LAYOUT` - (if any of the following are true)
- `zdnn_ztensor->pre_transformed_desc->layout` is not recognized or is not a
valid pre_transformed_desc layout.
- `zdnn_ztensor->transformed_desc->layout` is not recognized or is not a valid
transformed_desc layout.
- `ZDNN_INVALID_TYPE` - (if any of the following are true)
- `zdnn_ztensor->pre_transformed_desc->type` is not recognized or is a
transformed_desc type: [Quantized zTensor Requirements](#quan-zten-reqs)
- `zdnn_ztensor->transformed_desc->type` is not recognized or is a
pre_transformed_desc type: [Quantized zTensor Requirements](#quan-zten-reqs)
- `ZDNN_INVALID_BUFFER` (if any of the following are true)
- `buffer` is `NULL`.
- `buffer` is not on a 4K boundary.
- `buffer_size` is too small to hold transformed values.
- `ZDNN_INVALID_SHAPE` - (if any of the following are true)
- One of `zdnn_ztensor->transformed_desc->dim*` dimensions is 0.
- One of `zdnn_ztensor->transformed_desc->dim*` dimensions is greater than
[zdnn_get_max_for_dim](#zdnn_get_max_for_dim).
- The total number of transformed_desc elements is larger than
`zdnn_get_nnpa_max_tensor_size`.
- `ZDNN_INVALID_STATE` - Tensor is already transformed.
- `ZDNN_INVALID_CLIPPING_VALUE` - clip_min value is not less than clip_max
value.
- [hardware statuses](#hw-statuses)
- `ZDNN_FUNC_RC_F000` - Unsupported transformation function.
- `ZDNN_FUNC_RC_F001` - Either scale or offset is non-numeric or scale value
is zero.
#### Since
1.1.0
#### Requirements
This feature requires that:
- `zdnn_is_nnpa_installed()` returns true
- the underlying hardware supports zDNN APIs 1.1.x or later at runtime
See [Validating the environment at runtime](#runtime-val).
---
### zdnn_transform_origtensor
#### Description
Converts the input tensor from the zDNN transformed format back to a standard
non-transformed layout. The `is_transformed` field within `ztensor` must be
`true`.
All stick format tensors are supported, except:
- Kernel tensors
- Concatenated RNN input-gates tensors
#### Format
```C
zdnn_status zdnn_transform_origtensor(const zdnn_ztensor *ztensor, void *out_buf);
```
#### Parameters
- `zdnn_ztensor *ztensor`
- The input `zdnn_ztensor` struct. `pre_transformed_desc`, `transformed_desc`
and `buffer` must be set, `is_transformed` must be `true`.
- `void *out_buf`
- The buffer for storing the standard non-transformed tensor data. Must be
pre-allocated by the caller.
#### Programming Notes
- This function clears the pre-thread floating-point exception flags at entry,
and may set `FE_UNDERFLOW` / `FE_INVALID` / `FE_INEXACT` / `FE_OVERFLOW` when
it encounters errors during data conversion.
#### Returns zdnn_status indications
- `ZDNN_OK`
- `ZDNN_INVALID_FORMAT` - `ztensor->transformed_desc->format` is not
`ZDNN_FORMAT_4DFEATURE`.
- `ZDNN_INVALID_LAYOUT` - (if any of the following are true)
- `zdnn_ztensor->pre_transformed_desc->layout` is not recognized or is not a
valid pre_transformed_desc layout.
- `zdnn_ztensor->transformed_desc->layout` is not recognized or is not a valid
transformed_desc layout required by this function.
- `ZDNN_INVALID_TYPE`
- `ztensor->pre_transformed_desc->type` is not recognized or is a
transformed_desc type.
- `ztensor->transformed_desc->type` is not recognized or is a
pre_transformed_desc type.
- `ZDNN_INVALID_BUFFER` (if any of the following are true)
- `ztensor->buffer` is `NULL`.
- `ztensor->buffer` is not on a 4K boundary.
- `ZDNN_INVALID_STATE` - `ztensor` is not transformed.
- `ZDNN_CONVERT_FAILURE` - Values failed to un-transform.
#### Since
1.0.0
#### Requirements
This feature requires that:
- `zdnn_is_nnpa_installed()` returns true
- the underlying hardware supports zDNN APIs 1.0.x or later at runtime
See [Validating the environment at runtime](#runtime-val).
---
## Operations
See [Table of Contents](#TOC) for operations list
---
## Element-wise Operations
[Back to Table of Contents](#TOC)
- [Addition](#zdnn_add)
- [Subtraction](#zdnn_sub)
- [Multiplication](#zdnn_mul)
- [Division](#zdnn_div)
- [Minimum](#zdnn_min)
- [Maximum](#zdnn_max)
- [Natural Logarithm](#zdnn_log)
- [Exponential](#zdnn_exp)
- [Square Root](#zdnn_sqrt)
- [Inverse Square Root](#zdnn_invsqrt)
---
### zdnn_add
- [Back to Table of Contents](#TOC)
- [Back to Element-wise Operations](#elwise-ops)
#### Description
Given two input tensors in zDNN transformed format, performs element-wise
addition and stores the result into the provided output zDNN tensor.
_Note that for zDNN use, broadcasting of the input tensor(s) must be performed
by the caller. As such, the input tensors must be of the same shape._
#### Format
```C
zdnn_status zdnn_add(const zdnn_ztensor *input_a, const zdnn_ztensor *input_b,
zdnn_ztensor *output);
```
#### Parameters
- `zdnn_ztensor *input_a`
- Tensor with addends to add to `input_b` tensor
- Must follow [general tensor requirements](#gen-zten-reqs)
- `zdnn_ztensor *input_b`
- Tensor with addends to add to `input_a` tensor
- Must follow [general tensor requirements](#gen-zten-reqs)
- `zdnn_ztensor *output`
- Tensor to hold the result of the addition
- Must follow [general tensor requirements](#gen-zten-reqs)
#### Returns (see [zDNN Statuses](#common-statuses) for descriptions)
- `ZDNN_OK`
- [warning statuses](#warning-statuses)
- `ZDNN_INVALID_SHAPE`
- `ZDNN_INVALID_TYPE`
- `ZDNN_INVALID_FORMAT`
- [hardware statuses](#hw-statuses)
#### Since
1.0.0
#### Requirements
This feature requires that:
- `zdnn_is_nnpa_installed()` returns true
- the underlying hardware supports zDNN APIs 1.0.x or later at runtime
See [Validating the environment at runtime](#runtime-val).
#### Framework Examples
[TensorFlow Addition](https://www.tensorflow.org/api_docs/python/tf/math/add)
[ONNX Addition](https://onnx.ai/onnx/operators/onnx__Add.html#l-onnx-doc-add)
---
### zdnn_sub
- [Back to Table of Contents](#TOC)
- [Back to Element-wise Operations](#elwise-ops)
#### Description
Given two input tensors in zDNN transformed format, performs element-wise
subtraction and stores the result into the provided output zDNN tensor.
_Note that for zDNN use, broadcasting of the input tensor(s) must be performed
by the caller. As such, the input tensors must be of the same shape._
#### Format
```C
zdnn_status zdnn_sub(const zdnn_ztensor *input_a, const zdnn_ztensor *input_b,
zdnn_ztensor *output);
```
#### Parameters
- `zdnn_ztensor *input_a`
- Tensor with minuends that will be subtracted by `input_b` tensor.
- Must follow [general tensor requirements](#gen-zten-reqs)
- `zdnn_ztensor *input_b`
- Tensor with subtrahends to subtract from `input_a` tensor.
- Must follow [general tensor requirements](#gen-zten-reqs)
- `zdnn_ztensor *output`
- Tensor to hold the result of the subtraction
- Must follow [general tensor requirements](#gen-zten-reqs)
#### Returns (see [zDNN Statuses](#common-statuses) for descriptions)
- `ZDNN_OK`
- [warning statuses](#warning-statuses)
- `ZDNN_INVALID_SHAPE`
- `ZDNN_INVALID_TYPE`
- `ZDNN_INVALID_FORMAT`
- [hardware statuses](#hw-statuses)
#### Since
1.1.0
#### Requirements
This feature requires that:
- `zdnn_is_nnpa_installed()` returns true
- the underlying hardware supports zDNN APIs 1.0.x or later at runtime
See [Validating the environment at runtime](#runtime-val).
#### Framework Examples
[TensorFlow Subtraction](https://www.tensorflow.org/api_docs/python/tf/math/subtract)
[ONNX Subtraction](https://onnx.ai/onnx/operators/onnx__Sub.html#l-onnx-doc-sub)
---
### zdnn_mul
- [Back to Table of Contents](#TOC)
- [Back to Element-wise Operations](#elwise-ops)
#### Description
Given two input tensors in zDNN transformed format, performs element-wise
multiplication and stores the result into the provided output zDNN tensor.
_Note that for zDNN use, broadcasting of the input tensor(s) must be performed
by the caller. As such, the input tensors must be of the same shape._
#### Format
```C
zdnn_status zdnn_mul(const zdnn_ztensor *input_a, const zdnn_ztensor *input_b,
zdnn_ztensor *output);
```
#### Parameters
- `zdnn_ztensor *input_a`
- Tensor with multiplicands that will be multiplied by `input_b` tensor.
- Must follow [general tensor requirements](#gen-zten-reqs)
- `zdnn_ztensor *input_b`
- Tensor with multipliers for `input_a` tensor.
- Must follow [general tensor requirements](#gen-zten-reqs)
- `zdnn_ztensor *output`
- Tensor to hold the result of the multiplication.
- Must follow [general tensor requirements](#gen-zten-reqs)
#### Returns (see [zDNN Statuses](#common-statuses) for descriptions)
- `ZDNN_OK`
- [warning statuses](#warning-statuses)
- `ZDNN_INVALID_SHAPE`
- `ZDNN_INVALID_TYPE`
- `ZDNN_INVALID_FORMAT`
- [hardware statuses](#hw-statuses)
#### Since
1.0.0
#### Requirements
This feature requires that:
- `zdnn_is_nnpa_installed()` returns true
- the underlying hardware supports zDNN APIs 1.0.x or later at runtime
See [Validating the environment at runtime](#runtime-val).
#### Framework Examples
[TensorFlow Multiplication](https://www.tensorflow.org/api_docs/python/tf/math/multiply)
[ONNX Multiplication](https://onnx.ai/onnx/operators/onnx__Mul.html#l-onnx-doc-mul)
---
### zdnn_div
- [Back to Table of Contents](#TOC)
- [Back to Element-wise Operations](#elwise-ops)
#### Description
Given two input tensors in zDNN transformed format, performs element-wise
division and stores the result into the provided output zDNN tensor.
_Note that for zDNN use, broadcasting of the input tensor(s) must be performed
by the caller. As such, the input tensors must be of the same shape._
#### Format
```C
zdnn_status zdnn_div(const zdnn_ztensor *input_a, const zdnn_ztensor *input_b,
zdnn_ztensor *output);
```
#### Parameters
- `zdnn_ztensor *input_a`
- Tensor with dividends that will be divided by `input_b` tensor.
- Must follow [general tensor requirements](#gen-zten-reqs)
- `zdnn_ztensor *input_b`
- Tensor with divisors for `input_a` tensor.
- Must follow [general tensor requirements](#gen-zten-reqs)
- `zdnn_ztensor *output`
- Tensor to hold the result of the division.
- Must follow [general tensor requirements](#gen-zten-reqs)
#### Returns (see [zDNN Statuses](#common-statuses) for descriptions)
- `ZDNN_OK`
- [warning statuses](#warning-statuses)
- `ZDNN_INVALID_SHAPE`
- `ZDNN_INVALID_TYPE`
- `ZDNN_INVALID_FORMAT`
- [hardware statuses](#hw-statuses)
#### Since
1.0.0
#### Requirements
This feature requires that:
- `zdnn_is_nnpa_installed()` returns true
- the underlying hardware supports zDNN APIs 1.0.x or later at runtime
See [Validating the environment at runtime](#runtime-val).
#### Framework Examples
[TensorFlow Division](https://www.tensorflow.org/api_docs/python/tf/math/divide)
[ONNX Division](https://onnx.ai/onnx/operators/onnx__Div.html#l-onnx-doc-div)
---
### zdnn_min
- [Back to Table of Contents](#TOC)
- [Back to Element-wise Operations](#elwise-ops)
#### Description
Given two input tensors in zDNN transformed format, computes the element-wise
minimum and stores the result into the provided output zDNN tensor.
_Note that for zDNN use, broadcasting of the input tensor(s) must be performed
by the caller. As such, the input tensors must be of the same shape._
#### Format
```C
zdnn_status zdnn_min(const zdnn_ztensor *input_a, const zdnn_ztensor *input_b,
zdnn_ztensor *output);
```
#### Parameters
- `zdnn_ztensor *input_a`
- Tensor with values that will be compared with `input_b` tensor.
- Must follow [general tensor requirements](#gen-zten-reqs)
- `zdnn_ztensor *input_b`
- Tensor with values that will be compared with `input_a` tensor.
- Must follow [general tensor requirements](#gen-zten-reqs)
- `zdnn_ztensor *output`
- Tensor that holds the smaller value from each comparison of the inputs.
- Must follow [general tensor requirements](#gen-zten-reqs)
#### Returns (see [zDNN Statuses](#common-statuses) for descriptions)
- `ZDNN_OK`
- [warning statuses](#warning-statuses)
- `ZDNN_INVALID_SHAPE`
- `ZDNN_INVALID_TYPE`
- `ZDNN_INVALID_FORMAT`
- [hardware statuses](#hw-statuses)
#### Since
1.0.0
#### Requirements
This feature requires that:
- `zdnn_is_nnpa_installed()` returns true
- the underlying hardware supports zDNN APIs 1.0.x or later at runtime
See [Validating the environment at runtime](#runtime-val).
#### Framework Examples
[TensorFlow Minimum](https://www.tensorflow.org/api_docs/python/tf/math/minimum)
[ONNX Minimum](https://onnx.ai/onnx/operators/onnx__Min.html#l-onnx-doc-min)
---
### zdnn_max
- [Back to Table of Contents](#TOC)
- [Back to Element-wise Operations](#elwise-ops)
#### Description
Given two input tensors in zDNN transformed format, computes the element-wise
maximum and stores the result into the provided output zDNN tensor.
_Note that for zDNN use, broadcasting of the input tensor(s) must be performed
by the caller. As such, the input tensors must be of the same shape._
#### Format
```C
zdnn_status zdnn_max(const zdnn_ztensor *input_a, const zdnn_ztensor *input_b,
zdnn_ztensor *output);
```
#### Parameters
- `zdnn_ztensor *input_a`
- Tensor with values that will be compared with `input_b` tensor.
- Must follow [general tensor requirements](#gen-zten-reqs)
- `zdnn_ztensor *input_b`
- Tensor with values that will be compared with `input_a` tensor.
- Must follow [general tensor requirements](#gen-zten-reqs)
- `zdnn_ztensor *output`
- Tensor that holds the larger value from each comparison of the inputs.
- Must follow [general tensor requirements](#gen-zten-reqs)
#### Returns (see [zDNN Statuses](#common-statuses) for descriptions)s
- `ZDNN_OK`
- [warning statuses](#warning-statuses)
- `ZDNN_INVALID_SHAPE`
- `ZDNN_INVALID_TYPE`
- `ZDNN_INVALID_FORMAT`
- [hardware statuses](#hw-statuses)
#### Since
1.0.0
#### Requirements
This feature requires that:
- `zdnn_is_nnpa_installed()` returns true
- the underlying hardware supports zDNN APIs 1.0.x or later at runtime
See [Validating the environment at runtime](#runtime-val).
#### Framework Examples
[TensorFlow Maximum](ttps://www.tensorflow.org/api_docs/python/tf/math/maximum)
[ONNX Maximum](https://onnx.ai/onnx/operators/onnx__Max.html#l-onnx-doc-max)
---
### zdnn_log
- [Back to Table of Contents](#TOC)
- [Back to Element-wise Operations](#elwise-ops)
#### Description
Given an input tensor in zDNN transformed format, computes the natural logarithm
element-wise and stores the result into the provided output zDNN tensor.
#### Format
```C
zdnn_status zdnn_log(const zdnn_ztensor *input, zdnn_ztensor *output);
```
#### Parameters
- `zdnn_ztensor *input`
- Tensor with values to evaluate.
- Must follow [general tensor requirements](#gen-zten-reqs)
- `zdnn_ztensor *output`
- Tensor that holds the calculated natural logarithm of each value from
`input_a`
- Must follow [general tensor requirements](#gen-zten-reqs)
#### Returns (see [zDNN Statuses](#common-statuses) for descriptions)
- `ZDNN_OK`
- [warning statuses](#warning-statuses)
- `ZDNN_INVALID_SHAPE`
- `ZDNN_INVALID_TYPE`
- `ZDNN_INVALID_FORMAT`
- [hardware statuses](#hw-statuses)
#### Since
1.0.0
#### Requirements
This feature requires that:
- `zdnn_is_nnpa_installed()` returns true
- the underlying hardware supports zDNN APIs 1.0.x or later at runtime
See [Validating the environment at runtime](#runtime-val).
#### Framework Examples
[TensorFlow Natural Logarithm](https://www.tensorflow.org/api_docs/python/tf/math/log)
[ONNX Natural Logarithm](https://onnx.ai/onnx/operators/onnx__Log.html#l-onnx-doc-log)
---
### zdnn_exp
- [Back to Table of Contents](#TOC)
- [Back to Element-wise Operations](#elwise-ops)
#### Description
Given an input tensor in zDNN transformed format, computes the exponential
element-wise and stores the result into the provided output zDNN tensor.
#### Format
```C
zdnn_status zdnn_exp(const zdnn_ztensor *input, zdnn_ztensor *output);
```
#### Parameters
- `zdnn_ztensor *input`
- Tensor with values to evaluate.
- Must follow [general tensor requirements](#gen-zten-reqs)
- `zdnn_ztensor *output`
- Tensor that holds the calculated exponential of each value from `input`
- Must follow [general tensor requirements](#gen-zten-reqs)
#### Returns (see [zDNN Statuses](#common-statuses) for descriptions)
- `ZDNN_OK`
- [warning statuses](#warning-statuses)
- `ZDNN_INVALID_SHAPE`
- `ZDNN_INVALID_TYPE`
- `ZDNN_INVALID_FORMAT`
- [hardware statuses](#hw-statuses)
#### Since
1.0.0
#### Requirements
This feature requires that:
- `zdnn_is_nnpa_installed()` returns true
- the underlying hardware supports zDNN APIs 1.0.x or later at runtime
See [Validating the environment at runtime](#runtime-val).
#### Framework Examples
[TensorFlow Exponential](https://www.tensorflow.org/api_docs/python/tf/math/exp)
[ONNX Exponential](https://onnx.ai/onnx/operators/onnx__Exp.html#l-onnx-doc-exp)
---
### zdnn_sqrt
- [Back to Table of Contents](#TOC)
- [Back to Element-wise Operations](#elwise-ops)
#### Description
Given an input tensor in zDNN transformed format, computes the square root
element-wise and stores the result into the provided output zDNN tensor.
#### Format
```C
zdnn_status zdnn_sqrt(const zdnn_ztensor *input, zdnn_ztensor *output);
```
#### Parameters
- `zdnn_ztensor *input`
- Tensor with values to evaluate.
- Must follow [general tensor requirements](#gen-zten-reqs)
- `zdnn_ztensor *output`
- Tensor that holds the calculated square root of each value from `input`
- Must follow [general tensor requirements](#gen-zten-reqs)
#### Returns (see [zDNN Statuses](#common-statuses) for descriptions)
- `ZDNN_OK`
- [warning statuses](#warning-statuses)
- `ZDNN_INVALID_SHAPE`
- `ZDNN_INVALID_TYPE`
- `ZDNN_INVALID_FORMAT`
- [hardware statuses](#hw-statuses)
#### Since
1.1.0
#### Requirements
This feature requires that:
- `zdnn_is_nnpa_installed()` returns true
- the underlying hardware supports zDNN APIs 1.1.x or later at runtime
See [Validating the environment at runtime](#runtime-val).
#### Framework Examples
[TensorFlow Square Root](https://www.tensorflow.org/api_docs/python/tf/math/sqrt)
[ONNX Square Root](https://onnx.ai/onnx/operators/onnx__Sqrt.html#l-onnx-doc-sqrt)
---
### zdnn_invsqrt
- [Back to Table of Contents](#TOC)
- [Back to Element-wise Operations](#elwise-ops)
#### Description
Given an input tensor in zDNN transformed format, computes the inverse square
root element-wise and stores the result into the provided output zDNN tensor.
#### Format
```C
zdnn_status zdnn_invsqrt(const zdnn_ztensor *input, float epsilon,
zdnn_ztensor *output);
```
#### Parameters
- `zdnn_ztensor *input`
- Tensor with values to evaluate.
- Must follow [general tensor requirements](#gen-zten-reqs)
- `float epsilon`
- A float value added to input prior to computation.
- `zdnn_ztensor *output`
- Tensor that holds the calculated inverse square root of each value from
`input`
- Must follow [general tensor requirements](#gen-zten-reqs)
#### Returns (see [zDNN Statuses](#common-statuses) for descriptions)
- `ZDNN_OK`
- [warning statuses](#warning-statuses)
- `ZDNN_INVALID_SHAPE`
- `ZDNN_INVALID_TYPE`
- `ZDNN_INVALID_FORMAT`
- `ZDNN_INVALID_EPSILON`
- [hardware statuses](#hw-statuses)
#### Programming Notes
- On some models, if either or both an element and epsilon are very large, the
addition of the two may result in a nonnumeric value, the inverse square root
of which will also be nonnumeric. This may occur even though the inverse
square root of an unconstrained sum would easily fit in the data type of an
output-tensor element.
#### Since
1.1.0
#### Requirements
This feature requires that:
- `zdnn_is_nnpa_installed()` returns true
- the underlying hardware supports zDNN APIs 1.1.x or later at runtime
See [Validating the environment at runtime](#runtime-val).
#### Framework Examples
[TensorFlow Reciprical Square Root](https://www.tensorflow.org/api_docs/python/tf/math/rsqrt)
---
## Activation Operations
[Back to Table of Contents](#TOC)
- [Rectified Linear](#zdnn_relu)
- [Leaky Rectified Linear](#zdnn_leaky_relu)
- [Hyperbolic Tangent](#zdnn_tanh)
- [Sigmoid](#zdnn_sigmoid)
- [Softmax](#zdnn_softmax)
- [Softmax with Mask](#zdnn_softmax_mask)
- [Gaussian Error Linear Unit](#zdnn_gelu)
---
### zdnn_relu
- [Back to Table of Contents](#TOC)
- [Back to Activation Operations](#act-ops)
#### Description
Given an input tensor in zDNN transformed format produce an output tensor where
the rectified linear function, y = max(0, x) is applied to the input
element-wise. If an optional clipping_value is provided, clipping is performed
against the intermediate output where z = min(y, clipping_value).
#### Format
```C
zdnn_status zdnn_relu(const zdnn_ztensor *input, const void *clipping_value,
zdnn_ztensor *output);
```
#### Parameters
- `zdnn_ztensor *input`
- Tensor with values to evaluate.
- Must follow [general tensor requirements](#gen-zten-reqs)
- `void *clipping_value`
- A pointer to an FP32 value, used to clip input tensor's elements.
- If set to NULL or 0, no clipping will occur.
- Must not be a negative value.
- `zdnn_ztensor *output`
- Tensor that holds the rectified linear function result of each value from
`input`
- Must follow [general tensor requirements](#gen-zten-reqs)
#### Returns (see [zDNN Statuses](#common-statuses) for descriptions)
- `ZDNN_OK`
- [warning statuses](#warning-statuses)
- `ZDNN_INVALID_SHAPE`
- `ZDNN_INVALID_TYPE`
- `ZDNN_INVALID_FORMAT`
- `ZDNN_INVALID_CLIPPING_VALUE`
- [hardware statuses](#hw-statuses)
#### Since
1.0.0
#### Requirements
This feature requires that:
- `zdnn_is_nnpa_installed()` returns true
- the underlying hardware supports zDNN APIs 1.0.x or later at runtime
See [Validating the environment at runtime](#runtime-val).
#### Framework Examples
[TensorFlow Rectified Linear](https://www.tensorflow.org/api_docs/python/tf/nn/relu)
[ONNX Rectified Linear](https://onnx.ai/onnx/operators/onnx__Relu.html#l-onnx-doc-relu)
---
### zdnn_leaky_relu
- [Back to Table of Contents](#TOC)
- [Back to Activation Operations](#act-ops)
#### Description
Given an input tensor in zDNN transformed format produce an output tensor where
the leaky rectified linear function is applied to the input element-wise. The
calculation used depends on the input element. When negative, y = a \* x, where
a is the adjustment factor. When 0 or positive, y = x. If an optional
clipping_value is provided, clipping is performed against the intermediate
output where z = min(y, clipping_value).
#### Format
```C
zdnn_status zdnn_leaky_relu(const zdnn_ztensor *input,
const void *clipping_value,
float adjustment_factor, zdnn_ztensor *output);
```
#### Parameters
- `zdnn_ztensor *input`
- Tensor with values to evaluate.
- Must follow [general tensor requirements](#gen-zten-reqs)
- `void *clipping_value`
- A pointer to an FP32 value, used to clip input tensor's elements.
- If set to NULL or 0, no clipping will occur.
- Must not be a negative value.
- `float adjustment_factor`
- A float value multiplied with negative elements from input.
- Must not be a negative value.
- Must not be greater than 1.
- `zdnn_ztensor *output`
- Tensor that holds the rectified linear function result of each value from
`input`
- Must follow [general tensor requirements](#gen-zten-reqs)
#### Returns (see [zDNN Statuses](#common-statuses) for descriptions)
- `ZDNN_OK`
- [warning statuses](#warning-statuses)
- `ZDNN_INVALID_SHAPE`
- `ZDNN_INVALID_TYPE`
- `ZDNN_INVALID_FORMAT`
- `ZDNN_INVALID_CLIPPING_VALUE`
- `ZDNN_INVALID_ADJUSTMENT_FACTOR`
- [hardware statuses](#hw-statuses)
#### Since
1.1.0
#### Requirements
This feature requires that:
- `zdnn_is_nnpa_installed()` returns true
- the underlying hardware supports zDNN APIs 1.1.x or later at runtime
See [Validating the environment at runtime](#runtime-val).
#### Framework Examples
[TensorFlow Leaky Rectified Linear](https://www.tensorflow.org/api_docs/python/tf/nn/leaky_relu)
[ONNX Leaky Rectified Linear](https://onnx.ai/onnx/operators/onnx__LeakyRelu.html#l-onnx-doc-leakyrelu)
---
### zdnn_tanh
- [Back to Table of Contents](#TOC)
- [Back to Activation Operations](#act-ops)
#### Description
Given an input tensor in zDNN transformed format, produces an output tensor
where the hyperbolic tangent is applied to the input element-wise.
#### Format
```C
zdnn_status zdnn_tanh(const zdnn_ztensor *input, zdnn_ztensor *output);
```
#### Parameters
- `zdnn_ztensor *input`
- Tensor with values to evaluate.
- Must follow [general tensor requirements](#gen-zten-reqs)
- `zdnn_ztensor *output`
- Tensor that holds the hyperbolic tangent result of each value from `input`
- Must follow [general tensor requirements](#gen-zten-reqs)
#### Returns (see [zDNN Statuses](#common-statuses) for descriptions)
- `ZDNN_OK`
- [warning statuses](#warning-statuses)
- `ZDNN_INVALID_SHAPE`
- `ZDNN_INVALID_TYPE`
- `ZDNN_INVALID_FORMAT`
- [hardware statuses](#hw-statuses)
#### Since
1.0.0
#### Requirements
This feature requires that:
- `zdnn_is_nnpa_installed()` returns true
- the underlying hardware supports zDNN APIs 1.0.x or later at runtime
See [Validating the environment at runtime](#runtime-val).
#### Framework Examples
[TensorFlow Hyperbolic Tangent](https://www.tensorflow.org/api_docs/python/tf/math/tanh)
[ONNX Hyperbolic Tangent](https://onnx.ai/onnx/operators/onnx__Tanh.html#l-onnx-doc-tanh)
---
### zdnn_sigmoid
- [Back to Table of Contents](#TOC)
- [Back to Activation Operations](#act-ops)
#### Description
Given an input tensor in zDNN transformed format, produces an output tensor
where the sigmoid function is applied to the input element-wise.
#### Format
```C
zdnn_status zdnn_sigmoid(const zdnn_ztensor *input, zdnn_ztensor *output);
```
#### Parameters
- `zdnn_ztensor *input`
- Tensor with values to evaluate.
- Must follow [general tensor requirements](#gen-zten-reqs)
- `zdnn_ztensor *output`
- Tensor that holds the sigmoid result of each value from `input`
- Must follow [general tensor requirements](#gen-zten-reqs)
#### Returns (see [zDNN Statuses](#common-statuses) for descriptions)
- `ZDNN_OK`
- [warning statuses](#warning-statuses)
- `ZDNN_INVALID_SHAPE`
- `ZDNN_INVALID_TYPE`
- `ZDNN_INVALID_FORMAT`
- [hardware statuses](#hw-statuses)
#### Since
1.0.0
#### Requirements
This feature requires that:
- `zdnn_is_nnpa_installed()` returns true
- the underlying hardware supports zDNN APIs 1.0.x or later at runtime
See [Validating the environment at runtime](#runtime-val).
#### Framework Examples
[TensorFlow Sigmoid](https://www.tensorflow.org/api_docs/python/tf/math/sigmoid)
[ONNX Sigmoid](https://onnx.ai/onnx/operators/onnx__Sigmoid.html#l-onnx-doc-sigmoid)
---
### zdnn_softmax
- [Back to Table of Contents](#TOC)
- [Back to Activation Operations](#act-ops)
#### Description
Given an input tensor in zDNN transformed format, computes the softmax
(normalized exponential) for each vector formed in dimension-1, then if
`act_func` is not `SOFTMAX_ACT_NONE`, the activation function is applied to the
results. Finally stores the results into the provided output zDNN tensor.
_Note: Other parameters, such as axis, are not supported._
#### Format
```C
zdnn_status zdnn_softmax(const zdnn_ztensor *input, void *save_area,
zdnn_softmax_act act_func, zdnn_ztensor *output);
```
#### Parameters
- `zdnn_ztensor *input`
- [ZDNN_3DS](#common-layouts) tensor with pre-transformed shape [batch size,
batch size, vector dimension size] or output from another operation that is
of the correct shape.
- Must follow [general tensor requirements](#gen-zten-reqs)
- `void *save_area`
- A preallocated memory address to use for temporary storage during internal
operation processing.
- The preallocate memory must be at least 8K bytes in size, aligned on a 4k
boundary.
- If set to NULL, the operation will determine, allocate and free storage
automatically.
- `zdnn_softmax_act act_func`
- Activation function to apply to the results.
- `SOFTMAX_ACT_NONE` or `SOFTMAX_ACT_LOG`
- `zdnn_ztensor *output`
- [ZDNN_3DS](#common-layouts) tensor with the same shape as `input_a` that
holds the softmax result of each value from `input_a`.
- Must follow [general tensor requirements](#gen-zten-reqs)
#### Programming Notes
- If all elements of a dimension 1 vector are the largest magnitude negative
number possible for the transformed data type, accuracy may be reduced.
- A `ZDNN_3DS` tensor is expected, where the `transformed_desc` dim1 describes
the vector, and dim2 and dim4 are used to batch multiple vector requests
together. Dim3 must always be 1. The `zdnn_softmax` operation is performed
against the vector in dim1 repeating for each dim1 vector in the dim4 and dim2
dimensions.
- Tensors that cannot be processed as vectors in dim1 or as batches of dim1
vectors must be coerced or reshaped by the caller.
- When the entire tensor is to be processed by softmax, it can be coerced by
simply creating an alternate descriptor prior to zDNN transformation. For
example:
- A 4D tensor with `pre_transformed_desc` dimensions 2x2x2x2 and a data
array of 16 FP32 entries could have an alternate `ZDNN_3DS` layout
`pre_transformed_desc` using dimensions 8x1x2 and use the same original
data array prior to `zdnn_transform_ztensor`. After transformation, such a
tensor would be valid for `zdnn_softmax`.
- In another example, the 4D 2x2x2x2 tensor could be processed as 8 batches
of 2 vectors using a `ZDNN_3DS` layout `pre_transformed_desc` with
dimensions 1x8x2.
- The inner-most dimension must remain the same during this coercion.
#### Returns (see [zDNN Statuses](#common-statuses) for descriptions)
- `ZDNN_OK`
- [warning statuses](#warning-statuses)
- `ZDNN_INVALID_SHAPE`
- `ZDNN_INVALID_TYPE`
- `ZDNN_INVALID_FORMAT`
- `ZDNN_ALLOCATION_FAILURE` - A preallocated `save_area` was not specified and
internal allocation for the required memory failed.
- [hardware statuses](#hw-statuses)
- `ZDNN_FUNC_RC_F000` - input tensor `input->transformed_desc->dim3` was
not 1.
- `ZDNN_FUNC_RC_F001` - Invalid `act_func`
#### Since
1.0.0
#### Requirements
This feature requires that:
- `zdnn_is_nnpa_installed()` returns true
- the underlying hardware supports zDNN APIs 1.0.x or later at runtime
See [Validating the environment at runtime](#runtime-val).
#### Framework Examples
[TensorFlow Softmax](https://www.tensorflow.org/api_docs/python/tf/nn/softmax)
[ONNX Softmax](https://onnx.ai/onnx/operators/onnx__Softmax.html#l-onnx-doc-softmax)
---
### zdnn_softmax_mask
- [Back to Table of Contents](#TOC)
- [Back to Activation Operations](#act-ops)
#### Description
Given an input tensor in zDNN transformed format, computes the softmax
(normalized exponential) for each vector formed in dimension-1 (from element
zero to mask - 1), then if `act_func` is not `SOFTMAX_ACT_NONE`, the activation
function is applied to the results. Finally stores the results into the provided
output zDNN tensor.
_Note: Other parameters, such as axis, are not supported._
#### Format
```C
zdnn_status zdnn_softmax_mask(const zdnn_ztensor *input, void *save_area,
zdnn_softmax_act act_func, uint32_t softmax_mask,
zdnn_ztensor *output);
```
#### Parameters
- `zdnn_ztensor *input`
- [ZDNN_3DS](#common-layouts) tensor with pre-transformed shape [batch size,
batch size, vector dimension size] or output from another operation that is
of the correct shape.
- Must follow [general tensor requirements](#gen-zten-reqs)
- `void *save_area`
- A preallocated memory address to use for temporary storage during internal
operation processing.
- The preallocate memory must be at least 8K bytes in size, aligned on a 4k
boundary.
- If set to NULL, the operation will determine, allocate and free storage
automatically.
- `zdnn_softmax_act act_func`
- Activation function to apply to the results.
- `SOFTMAX_ACT_NONE` or `SOFTMAX_ACT_LOG`
- `uint32_t softmax_mask`
- 32-bit unsigned binary integer that specifies a count of dimensions 1
elements to be processed.
- If 0, behavior matches `zdnn_softmax`
- Must not exceed dimension 1 of input tensor.
- `zdnn_ztensor *output`
- [ZDNN_3DS](#common-layouts) tensor with the same shape as `input_a` that
holds the softmax result of each value from `input_a`.
- Must follow [general tensor requirements](#gen-zten-reqs)
#### Programming Notes
- If all elements of a dimension 1 vector are the largest magnitude negative
number possible for the transformed data type, accuracy may be reduced.
- A `ZDNN_3DS` tensor is expected, where the `transformed_desc` dim1 describes
the vector, and dim2 and dim4 are used to batch multiple vector requests
together. Dim3 must always be 1. The `zdnn_softmax_mask` operation is
performed against the vector in dim1 repeating for each dim1 vector in the
dim4 and dim2 dimensions.
- Tensors that cannot be processed as vectors in dim1 or as batches of dim1
vectors must be coerced or reshaped by the caller.
- When the entire tensor is to be processed by softmax, it can be coerced by
simply creating an alternate descriptor prior to zDNN transformation. For
example:
- A 4D tensor with `pre_transformed_desc` dimensions 2x2x2x2 and a data
array of 16 FP32 entries could have an alternate `ZDNN_3DS` layout
`pre_transformed_desc` using dimensions 8x1x2 and use the same original
data array prior to `zdnn_transform_ztensor`. After transformation, such a
tensor would be valid for `zdnn_softmax_mask`.
- In another example, the 4D 2x2x2x2 tensor could be processed as 8 batches
of 2 vectors using a `ZDNN_3DS` layout `pre_transformed_desc` with
dimensions 1x8x2.
#### Returns (see [zDNN Statuses](#common-statuses) for descriptions)
- `ZDNN_OK`
- [warning statuses](#warning-statuses)
- `ZDNN_INVALID_SHAPE`
- `ZDNN_INVALID_TYPE`
- `ZDNN_INVALID_FORMAT`
- `ZDNN_ALLOCATION_FAILURE` - A preallocated `save_area` was not specified and
internal allocation for the required memory failed.
- [hardware statuses](#hw-statuses)
- `ZDNN_FUNC_RC_F000` - input tensor `input->transformed_desc->dim3` was
not 1.
- `ZDNN_FUNC_RC_F001` - Invalid `act_func`
- `ZDNN_FUNC_RC_F002` - `softmax_mask` exceeds dimension 1 of input tensor.
#### Since
1.1.0
#### Requirements
This feature requires that:
- `zdnn_is_nnpa_installed()` returns true
- the underlying hardware supports zDNN APIs 1.1.x or later at runtime
See [Validating the environment at runtime](#runtime-val).
#### Framework Examples
[TensorFlow Softmax](https://www.tensorflow.org/api_docs/python/tf/nn/softmax)
[ONNX Softmax](https://onnx.ai/onnx/operators/onnx__Softmax.html#l-onnx-doc-softmax)
---
### zdnn_gelu
- [Back to Table of Contents](#TOC)
- [Back to Activation Operations](#act-ops)
#### Description
Given an input tensor in zDNN transformed format produce an output tensor where
the Gaussian Error Linear Unit activation function, y = 0.5 \* x \* (1 +
tanh(x \* 0.7978845608 \* (1 + 0.044715 \* x \* x))), is applied to the input
element-wise.
#### Format
```C
zdnn_status zdnn_gelu(const zdnn_ztensor *input, zdnn_ztensor *output);
```
#### Parameters
- `zdnn_ztensor *input`
- Tensor with values to evaluate.
- Must follow [general tensor requirements](#gen-zten-reqs)
- `zdnn_ztensor *output`
- Tensor that holds the Gaussian Error Linear Unit results of each value from
`input`
- Must follow [general tensor requirements](#gen-zten-reqs)
#### Returns (see [zDNN Statuses](#common-statuses) for descriptions)
- `ZDNN_OK`
- [warning statuses](#warning-statuses)
- `ZDNN_INVALID_SHAPE`
- `ZDNN_INVALID_TYPE`
- `ZDNN_INVALID_FORMAT`
- [hardware statuses](#hw-statuses)
#### Programming Notes
- The range of certain input-element values may result in an error of greater
than 1% in the output element, however the accuracy of properly conditioned
models is not significantly degraded.
#### Since
1.1.0
#### Requirements
This feature requires that:
- `zdnn_is_nnpa_installed()` returns true
- the underlying hardware supports zDNN APIs 1.1.x or later at runtime
See [Validating the environment at runtime](#runtime-val).
#### Framework Examples
[TensorFlow Gaussian Error Linear Unit](https://www.tensorflow.org/api_docs/python/tf/nn/gelu)
[ONNX Gaussian Error Linear Unit](https://onnx.ai/onnx/operators/onnx__Gelu.html#l-onnx-doc-gelu)
---
## Normalization Operations
[Back to Table of Contents](#TOC)
- [Mean Reduce](#zdnn_meanreduce2d)
- [Batch Norm](#zdnn_batchnorm)
- [Normalization](#zdnn_norm)
- [Moments](#zdnn_moments)
- [Layer Normalization](#zdnn_layernorm)
- [Reduce](#zdnn_reduce)
---
### zdnn_meanreduce2d
- [Back to Table of Contents](#TOC)
- [Back to Normalization Operations](#norm-ops)
#### Description
Given an input tensor in zDNN transformed format, produces a downsampled tensor
reducing the middle dimensions to a size of 1 based on the mean of the original
values and stores the result to the provided output zDNN tensor.
#### Format
```C
zdnn_status zdnn_meanreduce2d(const zdnn_ztensor *input, zdnn_ztensor *output);
```
#### Parameters
- `zdnn_ztensor *input`
- Must be a [ZDNN_NHWC](#common-layouts) tensor with pre_transformed shape
[batch_Num, Height, Width, Channel].
- Height and Width dimension must be less than or equal to 1024.
- Must follow [general tensor requirements](#gen-zten-reqs)
- `zdnn_ztensor *output`
- The result tensor which will hold the result of the pooling operation in its
buffer.
- Shape:
- `output` dimensions batch_Num and Channel must be the same as the
respective input dimensions.
- `output` dimensions Height and Width must be 1.
- Must follow [general tensor requirements](#gen-zten-reqs)
#### Returns (see [zDNN Statuses](#common-statuses) for descriptions)
- `ZDNN_OK`
- `ZDNN_INVALID_SHAPE` - Shape of input or output tensor is invalid based on
given kernel and stride parameters
- `ZDNN_INVALID_TYPE`
- `ZDNN_INVALID_FORMAT`
- [hardware statuses](#hw-statuses)
- `ZDNN_FUNC_RC_F001` - `input` tensor has a Height or Width dimension greater
than allowed for `zdnn_meanreduce2d`.
#### Since
1.0.0
#### Requirements
This feature requires that:
- `zdnn_is_nnpa_installed()` returns true
- the underlying hardware supports zDNN APIs 1.0.x or later at runtime
See [Validating the environment at runtime](#runtime-val).
#### Framework Examples
[TensorFlow Reduce Mean] with `axis` set for the Height and Width axes and
`keepdims` set to True.
[tensorflow reduce mean]:
https://www.tensorflow.org/api_docs/python/tf/math/reduce_mean
[ONNX Reduce Mean]
[onnx reduce mean]:
https://onnx.ai/onnx/operators/onnx__ReduceMean.html#l-onnx-doc-reducemean
---
### zdnn_batchnorm
- [Back to Table of Contents](#TOC)
- [Back to Normalization Operations](#norm-ops)
#### Description
Given three input zDNN tensors `input_a`, `input_b`, and `input_c`, computes the
batch-normalized result for each vector formed in dimension-1 as follows:
output = input_b \* input_a + input_c
where `input_b` is a precomputed elementwise divide of scale and variance
tensors, and `input_c` is a precomputed elementwise multiply of (-1) \* mean and
'input_b' + input bias tensors.
#### Format
```C
zdnn_status zdnn_batchnorm(const zdnn_ztensor *input_a,
const zdnn_ztensor *input_b,
const zdnn_ztensor *input_c, zdnn_ztensor *output);
```
#### Parameters
- `zdnn_ztensor *input_a`
- Must be a 4D [ZDNN_NHWC](#common-layouts) tensor
- Must follow [general tensor requirements](#gen-zten-reqs)
- `zdnn_ztensor *input_b`
- Must be a 1D [ZDNN_1D](#common-layouts) tensor
- Must follow [general tensor requirements](#gen-zten-reqs)
- `zdnn_ztensor *input_c`
- Must be a 1D [ZDNN_1D](#common-layouts) tensor
- Must follow [general tensor requirements](#gen-zten-reqs)
- `zdnn_ztensor *output`
- A zdnn_ztensor of the same size as `input_a` representing the computed value
of the above formula
- Must follow [general tensor requirements](#gen-zten-reqs)
#### Returns (see [zDNN Statuses](#common-statuses) for descriptions)
- `ZDNN_OK`
- [warning statuses](#warning-statuses)
- `ZDNN_INVALID_SHAPE`
- `ZDNN_INVALID_TYPE`
- `ZDNN_INVALID_FORMAT`
- [hardware statuses](#hw-statuses)
#### Since
1.0.0
#### Requirements
This feature requires that:
- `zdnn_is_nnpa_installed()` returns true
- the underlying hardware supports zDNN APIs 1.0.x or later at runtime
See [Validating the environment at runtime](#runtime-val).
#### Framework Examples
[TensorFlow Batchnorm]
[tensorflow batchnorm]:
https://www.tensorflow.org/api_docs/python/tf/keras/layers/BatchNormalization
[ONNX Batchnorm]
[onnx batchnorm]:
https://onnx.ai/onnx/operators/onnx__BatchNormalization.html#l-onnx-doc-batchnormalization
---
### zdnn_norm
- [Back to Table of Contents](#TOC)
- [Back to Normalization Operations](#norm-ops)
#### Description
Given input_a and input_b tensors in zDNN transformed format, produces the norm
of the difference of vectors. Calculation is performed as follows:
1. Each element in dimension 1 of input_b is subtracted by the corresponding
element of input_a.
2. The difference is squared.
3. The sum of the squared differences for dimension 1 is computed.
4. The square root of the sum is placed in the first element of dimension 1 of
output tensor.
#### Format
```C
zdnn_status zdnn_norm(const zdnn_ztensor *input_a, zdnn_ztensor *input_b, zdnn_ztensor *output);
```
#### Parameters
- `zdnn_ztensor *input_a`
- Tensor with values to evaluate.
- Must follow [general tensor requirements](#gen-zten-reqs)
- `zdnn_ztensor *input_b`
- Tensor with values to evaluate.
- Must follow [general tensor requirements](#gen-zten-reqs)
- `zdnn_ztensor *output`
- Tensor with the result of the normalization operation.
- Must follow [general tensor requirements](#gen-zten-reqs)
#### Returns (see [zDNN Statuses](#common-statuses) for descriptions)
- `ZDNN_OK`
- `ZDNN_INVALID_SHAPE`
- `ZDNN_INVALID_TYPE`
- `ZDNN_INVALID_FORMAT`
- [hardware statuses](#hw-statuses)
#### Since
1.1.0
#### Requirements
This feature requires that:
- `zdnn_is_nnpa_installed()` returns true
- the underlying hardware supports zDNN APIs 1.1.x or later at runtime
See [Validating the environment at runtime](#runtime-val).
#### Framework Examples
[TensorFlow Normalization]
[tensorflow normalization]:
https://www.tensorflow.org/api_docs/python/tf/keras/layers/Normalization
[ONNX Normalization]
N / A
---
### zdnn_moments
- [Back to Table of Contents](#TOC)
- [Back to Normalization Operations](#norm-ops)
#### Description
Given an input tensor in zDNN transformed format and a bessel correction type,
this produces the mean and variance for respective input tensor.
#### Format
```C
zdnn_status zdnn_moments(const zdnn_ztensor *input,
zdnn_moments_bessel bessel_correction_type,
zdnn_ztensor *output_a, zdnn_ztensor *output_b);
```
#### Parameters
- `zdnn_ztensor *input_a`
- Must be a 4D [ZDNN_NHWC](#common-layouts) tensor
- Must follow [general tensor requirements](#gen-zten-reqs)
- `zdnn_moments_bessel bessel_correction_type`
- Bessel correction type to perform moments.
- `MOMENTS_BESSEL_POPULATION`
- `MOMENTS_BESSEL_SAMPLE`
- `zdnn_ztensor *output_a`
- The output tensor that will hold the mean.
- Must follow [general tensor requirements](#gen-zten-reqs)
- `zdnn_ztensor *output_b`
- The output tensor that will hold the variance.
- Must follow [general tensor requirements](#gen-zten-reqs)
#### Returns (see [zDNN Statuses](#common-statuses) for descriptions)
- `ZDNN_OK`
- [warning statuses](#warning-statuses)
- `ZDNN_INVALID_SHAPE`
- `ZDNN_INVALID_TYPE`
- `ZDNN_INVALID_FORMAT`
- `ZDNN_INVALID_BESSEL_CORRECTION`
- [hardware statuses](#hw-statuses)
#### Since
1.1.0
#### Requirements
This feature requires that:
- `zdnn_is_nnpa_installed()` returns true
- the underlying hardware supports zDNN APIs 1.1.x or later at runtime
See [Validating the environment at runtime](#runtime-val).
#### Programming Notes
- The `zdnn_moments` operation may be used in combination of the
`zdnn_layernorm` operation. Please see [zdnn_layernorm](#zdnn_layernorm) for
more guidance.
- When `MOMENTS_BESSEL_SAMPLE` is provided for the bessel correction type, all
provided input dimensions of the input tensor must not be equal to 1.
#### Framework Examples
[TensorFlow Moments]
[tensorflow moments]: https://www.tensorflow.org/api_docs/python/tf/nn/moments
[ONNX Moments]
N/A
---
### zdnn_layernorm
- [Back to Table of Contents](#TOC)
- [Back to Normalization Operations](#norm-ops)
#### Description
Given input_a, input_b, and input_c tensors in zDNN transformed format, produces
the layernorm of the given tensors. Calculation is performed as follows:
1. Each element in dimension 1 of input_b is subtracted by the corresponding
element of input_a.
2. A corresponding element of input_c is added to epsilon.
3. The square root of the sume from step 2 is computed.
4. The difference from step 1 is divided by the result of step 3.
5. The quotient from step 4 is multiplied by gamma.
6. The product from step 5 is added to beta.
7. Result is stored in the corresponding element of output.
The above calculation could be depicted as follows:
#### Format
```C
zdnn_status zdnn_layernorm(const
zdnn_ztensor *input_a,
const zdnn_ztensor *input_b,
const zdnn_ztensor *input_c,
float beta, float gamma, float epsilon,
zdnn_ztensor *output);
```
#### Parameters
- `zdnn_ztensor *input_a`
- Must be a 4D [ZDNN_NHWC](#common-layouts) tensor
- Must follow [general tensor requirements](#gen-zten-reqs)
- `zdnn_ztensor *input_b`
- Must be a 4D [ZDNN_NHWC](#common-layouts) tensor
- Must follow [general tensor requirements](#gen-zten-reqs)
- Contains arithmetic means ([Moments](#zdnn_moments) output_a)
- `zdnn_ztensor *input_c`
- Must be a 4D [ZDNN_NHWC](#common-layouts) tensor
- Must follow [general tensor requirements](#gen-zten-reqs)
- Contains arithmetic variances ([Moments](#zdnn_moments) output_b)
- `float beta`
- Final result adjustment addend.
- `float gamma`
- Final result adjustment multiplier.
- `float epsilon`
- Intermediate variance adjustment.
- `zdnn_ztensor *output`
- Must follow [general tensor requirements](#gen-zten-reqs)
#### Returns (see [zDNN Statuses](#common-statuses) for descriptions)
- `ZDNN_OK`
- [warning statuses](#warning-statuses)
- `ZDNN_INVALID_SHAPE`
- `ZDNN_INVALID_TYPE`
- `ZDNN_INVALID_FORMAT`
- `ZDNN_INVALID_BETA`
- `ZDNN_INVALID_GAMMA`
- `ZDNN_INVALID_EPSILON`
- [hardware statuses](#hw-statuses)
#### Since
1.1.0
#### Requirements
This feature requires that:
- `zdnn_is_nnpa_installed()` returns true
- the underlying hardware supports zDNN APIs 1.1.x or later at runtime
See [Validating the environment at runtime](#runtime-val).
#### Programming Notes
- `zdnn_layernorm` is intended to be used in combination with the `zdnn_moments`
normalization operation. The `zdnn_moments` operation produces two output
tensors containing the means and variances, respectively, of the dimension-
4-index elements of the input tensor. The original input tensor to
`zdnn_moments` is intended to be used as the input-tensor 1 to
`zdnn_layernorm`. The output-tensors 1 and 2 of `zdnn_moments`are intended to
be used as input as input-tensor 2 and input-tensor 3 of the `zdnn_layernorm`
operation.
- The beta and gamma values in the 4th and 5th parameters of `zdnn_layernorm`,
(also reffered to as bias and gain), provide a learned scale and offset. The
epsilon value in parameter 6 of `zdnn_layernorm` is intended to be a small
value (for example, 0.001) to provide numerical stability.
#### Framework Examples
[TensorFlow Layernorm](https://www.tensorflow.org/api_docs/python/tf/keras/layers/LayerNormalization)
[ONNX Layernorm](https://onnx.ai/onnx/operators/onnx__LayerNormalization.html#l-onnx-doc-layernormalization)
---
### zdnn_reduce
- [Back to Table of Contents](#TOC)
- [Back to Activation Operations](#act-ops)
#### Description
Given an input tensor in zDNN transformed format, produces an output tensor
where the given reduction operation is performed.
#### Format
```C
zdnn_status zdnn_reduce(const zdnn_ztensor *input, void *save_area,
zdnn_reduce_ops op_type, zdnn_ztensor *output);
```
#### Parameters
- `zdnn_ztensor *input`
- Tensor with values to evaluate.
- Must follow [general tensor requirements](#gen-zten-reqs)
- `void *save_area`
- A preallocated memory address to use for temporary storage during internal
operation processing.
- The preallocate memory must be at least 8K bytes in size, aligned on a 4k
boundary.
- If set to NULL, the operation will determine, allocate and free storage
automatically.
- `zdnn_reduction_ops op_type`
- Reduction Operation to perform on input tensor.
- `REDUCE_OP_MINIMUM`
- `REDUCE_OP_MINIMUM_IDX`
- `REDUCE_OP_MAXIMUM`
- `REDUCE_OP_MINIMUM_IDX`
- `zdnn_ztensor *output`
- Tensor that holds the reduction operation result of each value from `input`
- Output dimension 1 must 1
- Must follow [general tensor requirements](#gen-zten-reqs)
- Data Type must be as follows:
- (FP32, FP16, BFLOAT) when `op_type` is `REDUCE_OP_MINIMUM` or
`REDUCE_OP_MAXIMUM`.
- INT32 when `op_type` is `REDUCE_OP_MINIMUM_IDX` or `REDUCE_OP_MAXIMUM_IDX`
The output when op_type is `REDUCE_OP_MINIMUM` or `REDUCE_OP_MAXIMUM` can be
initialized using:
```C
zdnn_data_layouts input_layout = ZDNN_3DS;
zdnn_data_types input_type = FP32;
uint32_t dim4 = 4;
uint32_t dim2 = 5;
uint32_t dim1 = 6;
zdnn_tensor_desc input_pre_transformed_desc;
zdnn_init_pre_transformed_desc(input_layout, input_type,
&input_pre_transformed_desc, dim4, dim2, dim1);
zdnn_tensor_desc output_pre_transformed_desc;
zdnn_init_pre_transformed_desc(input_layout, input_type,
&output_pre_transformed_desc, dim4, dim2, 1);
```
The output when op_type is `REDUCE_OP_MINIMUM_IDX` or `REDUCE_OP_MAXIMUM_IDX`
can be initialized using:
```C
zdnn_data_layouts input_layout = ZDNN_3DS;
zdnn_data_types input_type = FP32;
uint32_t dim4 = 4;
uint32_t dim2 = 5;
uint32_t dim1 = 6;
zdnn_tensor_desc input_pre_transformed_desc;
zdnn_init_pre_transformed_desc(input_layout, input_type,
&input_pre_transformed_desc, dim4, dim2, dim1);
zdnn_data_types output_type = INT32;
zdnn_tensor_desc output_pre_transformed_desc;
zdnn_init_pre_transformed_desc(input_layout, output_type,
&output_pre_transformed_desc, dim4, dim2, 1);
```
#### Programming Notes
- If a nonnumeric element is encountered in a dimension-1 vecotr of input-tenzor
1, then (a) the resulting element in dimension 1 of output-tensor 1 is
unpredictable, and the range-violation status will be returned.
- When the reduction operation is `REDUCE_OP_MINIMUM_IDX` the index of the first
min value, from left-to-right, is returned when there are mulitple elements
with the same min value.
- When the reduction operation is `REDUCE_OP_MAXIMUM_IDX` the index of the first
max value, from left-to-right, is returned when there are mulitple elements
with the same max value.
#### Returns (see [zDNN Statuses](#common-statuses) for descriptions)
- `ZDNN_OK`
- [warning statuses](#warning-statuses)
- `ZDNN_INVALID_SHAPE`
- `ZDNN_INVALID_TYPE`
- `ZDNN_INVALID_FORMAT`
- `ZDNN_UNAVAILABLE_FUNCTION`
- `ZDNN_ALLOCATION_FAILURE` - A preallocated `save_area` was not specified and
internal allocation for the required memory failed.
- [hardware statuses](#hw-statuses)
- `ZDNN_FUNC_RC_F000` - Invalid `op_type`.
#### Since
1.1.0
#### Requirements
This feature requires that:
- `zdnn_is_nnpa_installed()` returns true
- the underlying hardware supports zDNN APIs 1.1.x or later at runtime
See [Validating the environment at runtime](#runtime-val).
#### Framework Examples
[TensorFlow Reduce Min and Max]
[tensorflow reduce minimum](https://www.tensorflow.org/api_docs/python/tf/math/reduce_min)
[tensorflow reduce maximum](https://www.tensorflow.org/api_docs/python/tf/math/reduce_max)
[ONNX Reduce Min and Max]
[onnx reduce minimum](https://onnx.ai/onnx/operators/onnx__ReduceMin.html#l-onnx-doc-reducemin)
[onnx reduce maximum](https://onnx.ai/onnx/operators/onnx__ReduceMax.html#l-onnx-doc-reducemax)
---
### zdnn_matmul_op
[Back to Table of Contents](#TOC)
#### Description
Given three input zDNN tensors `input_a`, `input_b`, and `input_c`, determine
the matrix multiplication of `input_a` \* `input_b` then perform one of the
following operations, using `input_c` against the dot product, storing the
result into the specified `output` zDNN tensor:
- Addition
- Compare - If dot product is greater than element.
- Compare - If dot product is greater or equal to element.
- Compare - If dot product is equal to element.
- Compare - If dot product is not equal to element.
- Compare - If dot product is less than or equal to element.
- Compare - If dot product is less than element.
For an operation type of addition, `input_c` is added to the intermediate dot
product. For operation types of comparison, the intermediate dot product is
compared to `input_c` and if the comparison is true, the result is set to a
value of 1; otherwise it is set to a value of 0.
The outermost dimension can optionally indicate that the inputs are stacks of
matrices. The results for each matrix stack is independent of other stacks but
all stacks are calculated in a single call.
#### Format
```C
zdnn_status zdnn_matmul_op(const zdnn_ztensor *input_a,
const zdnn_ztensor *input_b,
const zdnn_ztensor *input_c,
zdnn_matmul_ops op_type, zdnn_ztensor *output);
```
#### Input / Output matmul tensor requirements
- See table in this section for `pre_transformed_desc` and shape requirements
for each tensor.
- All tensors must either be stacked or unstacked.
- Must follow [general tensor requirements](#gen-zten-reqs)
| type | input_a | input_b | input_c | result |
| --------- | -------------------- | -------------------- | ----------------- | -------------------- |
| unstacked | `ZDNN_2D` (m, n) | `ZDNN_2D` (n, p) | `ZDNN_1D` (p) | `ZDNN_2D` (m, p) |
| stacked | `ZDNN_3DS` (s, m, n) | `ZDNN_3DS` (s, n, p) | `ZDNN_2DS` (s, p) | `ZDNN_3DS` (s, m, p) |
#### Parameters
- `zdnn_ztensor *input_a`
- Input tensor with the first matrix for multiplication
- pre_transformed shape and layout must match
[matmul tensor requirements](#matmul-io-table)
- `zdnn_ztensor *input_b`
- Input tensor with the second matrix for multiplication
- pre_transformed shape and layout must match
[matmul tensor requirements](#matmul-io-table)
- `zdnn_ztensor *input_c`
- Input tensor that will have the requested operation performed against the
intermediate dot product of `input_a` and `input_b`.
- pre_transformed shape and layout must match
[matmul tensor requirements](#matmul-io-table)
- `zdnn_matmul_ops op_type`
- Operation to perform on dot product.
- `MATMUL_OP_ADDITION`
- `MATMUL_OP_GREATER`
- `MATMUL_OP_GREATER_EQUAL`
- `MATMUL_OP_EQUAL`
- `MATMUL_OP_NOT_EQUAL`
- `MATMUL_OP_LESSER_EQUAL`
- `MATMUL_OP_LESSER`
- `zdnn_ztensor *output`
- The output tensor which will hold the result of the operation in its buffer.
- pre_transformed shape and layout must match
[matmul tensor requirements](#matmul-io-table)
#### Programming Notes
- Care must be exercised when comparing values for equality or inequality since
the order of operations and rounding may produce, what appear to be, slightly
different values when they are essentially the same value.
#### Returns (see [zDNN Statuses](#common-statuses) for descriptions)
- `ZDNN_OK`
- `ZDNN_INVALID_SHAPE`
- `ZDNN_INVALID_TYPE`
- `ZDNN_INVALID_FORMAT`
- [hardware statuses](#hw-statuses)
- `ZDNN_FUNC_RC_F000` - Invalid `op_type`.
#### Since
1.1.0
#### Requirements
This feature requires that:
- `zdnn_is_nnpa_installed()` returns true
- the underlying hardware supports zDNN APIs 1.1.x or later at runtime
See [Validating the environment at runtime](#runtime-val).
#### Framework Examples
[TensorFlow MatMul](https://www.tensorflow.org/api_docs/cc/class/tensorflow/ops/mat-mul)
[ONNX MatMul](https://onnx.ai/onnx/operators/onnx__MatMul.html#l-onnx-doc-matmul)
---
### zdnn_matmul_bcast_op
[Back to Table of Contents](#TOC)
#### Description
Given three input zDNN tensors `input_a`, `input_b`, and `input_c`, determine
the matrix multiplication of `input_a` \* `input_b`, then perform one of the
following operations, using `input_c` against the dot product, storing the
result into the specified `output` zDNN tensor:
- Addition
- Compare - If dot product is greater than element.
- Compare - If dot product is greater or equal to element.
- Compare - If dot product is equal to element.
- Compare - If dot product is not equal to element.
- Compare - If dot product is less than or equal to element.
- Compare - If dot product is less than element.
When an input is `ZDNN_3DS`, the outermost dimension for that input can
optionally indicate that the input is a stack of matrices. Likewise, when an
input is `ZDNN_2DS`, the outermost dimension for that input can optionally
indicate that the input is a stack of vectors
For exmaple, if `input_a` were `ZDNN_3DS`, each stack of `input_a` is multiplied
by the same `input_b` matrix and `input_c` vector which are broadcast over each
stack of `input_a`. Results for each stack are returned in the corresponding
stack index of `output`.
Likewise, if `input_b` were `ZDNN_3DS` and `input_c` were `ZDNN_2DS`, each stack
of `input_b` is multiplied by the same `input_a` matrix which is broadcast over
each stack of `input_b` and `input_c`. Results for each stack are returned in
the corresponding stack index of `output`.
#### Format
```C
zdnn_status zdnn_matmul_bcast_op(const zdnn_ztensor *input_a,
const zdnn_ztensor *input_b,
const zdnn_ztensor *input_c,
zdnn_matmul_bcast_ops op_type,
zdnn_ztensor *output);
```
#### Input / Output matmul broadcast tensor requirements
- See table in this section for `pre_transformed_desc` and shape requirements
for each tensor.
- Must follow [general tensor requirements](#gen-zten-reqs)
| type | input_a | input_b | input_c | result |
| --------- | -------------------- | -------------------- | ----------------- | -------------------- |
| unstacked | `ZDNN_2D` (m, n) | `ZDNN_2D` (n, p) | `ZDNN_1D` (p) | `ZDNN_2D` (m, p) |
| stacked | `ZDNN_3DS` (s, m, n) | `ZDNN_3DS` (s, n, p) | `ZDNN_2DS` (s, p) | `ZDNN_3DS` (s, m, p) |
| bcast1 | `ZDNN_2D` (m, n) | `ZDNN_3DS` (s, n, p) | `ZDNN_2DS` (s, p) | `ZDNN_3DS` (s, m, p) |
| bcast23 | `ZDNN_3DS` (s, m, n) | `ZDNN_2D` (n, p) | `ZDNN_1D` (p) | `ZDNN_3DS` (s, m, p) |
#### Parameters
- `zdnn_ztensor *input_a`
- Input tensor with the first matrix for multiplication.
- pre_transformed shape and layout must match
[matmul broadcast tensor requirements](#matmul-bcast-io-table)
- `zdnn_ztensor *input_b`
- Input tensor with the second matrix for multiplication.
- pre_transformed shape and layout must match
[matmul broadcast tensor requirements](#matmul-bcast-io-table)
- `zdnn_ztensor *input_c`
- Input tensor that will have the requested operation performed against the
intermediate dot product for each "m" dimension in `output`.
- pre_transformed shape and layout must match
[matmul broadcast tensor requirements](#matmul-bcast-io-table)
- `zdnn_matmul_bcast_ops op_type`
- Operation to perform on dot product.
- `MATMUL_BCAST_OP_ADDITION`
- `MATMUL_BCAST_OP_GREATER`
- `MATMUL_BCAST_OP_GREATER_EQUAL`
- `MATMUL_BCAST_OP_EQUAL`
- `MATMUL_BCAST_OP_NOT_EQUAL`
- `MATMUL_BCAST_OP_LESSER_EQUAL`
- `MATMUL_BCAST_OP_LESSER`
- `zdnn_ztensor *output`
- The output tensor which will hold the result of the operation in its buffer.
- pre_transformed shape and layout must match
[matmul broadcast tensor requirements](#matmul-bcast-io-table)
#### Programming Notes
- When `NNPA_PARMBLKFORMAT_1` is not installed, `zdnn_matmul_bcast_ops` only
supports the `MATMUL_BCAST_OP_ADDITION` op_type.
If any other op_type is provided, `ZDNN_UNAVAILABLE_FUNCTION` is returned.
- `BCAST1` is not supported when `NNPA_PARMBLKFORMAT_1` is not installed and
will return `ZDNN_UNAVAILABLE_FUNCTION`.
- Care must be exercised when comparing values for equality or inequality since
the order of operations and rounding may produce what appear to be slightly
different values when they are essentially the same value.
#### Returns (see [zDNN Statuses](#common-statuses) for descriptions)
- `ZDNN_OK`
- `ZDNN_INVALID_SHAPE`
- `ZDNN_INVALID_TYPE`
- `ZDNN_INVALID_FORMAT`
- `ZDNN_UNAVAILABLE_FUNCTION`
- [hardware statuses](#hw-statuses)
- `ZDNN_FUNC_RC_F000` - Invalid `op_type`.
- `ZDNN_FUNC_RC_F001` - Invalid input/output type or format combination.
#### Since
1.0.0
#### Requirements
This feature requires that:
- `zdnn_is_nnpa_installed()` returns true
- the underlying hardware supports zDNN APIs 1.1.x or later at runtime for the
following:
- `zdnn_data_layouts` specification
- bcast1
- `zdnn_matmul_bcast_ops` specifications:
- `MATMUL_BCAST_OP_GREATER`
- `MATMUL_BCAST_OP_GREATER_EQUAL`
- `MATMUL_BCAST_OP_EQUAL`
- `MATMUL_BCAST_OP_NOT_EQUAL`
- `MATMUL_BCAST_OP_LESSER_EQUAL`
- `MATMUL_BCAST_OP_LESSER`
- the underlying hardware supports zDNN APIs 1.0.x or later at runtime.
See [Validating the environment at runtime](#runtime-val).
#### Framework Examples
[TensorFlow MatMul](https://www.tensorflow.org/api_docs/cc/class/tensorflow/ops/mat-mul)
[ONNX MatMul](https://onnx.ai/onnx/operators/onnx__MatMul.html#l-onnx-doc-matmul)
---
### zdnn_matmul_transpose_op
[Back to Table of Contents](#TOC)
#### Description
Given three input zDNN tensors `input_a`, `input_b`, and `input_c`, determine
the matrix multiplication of `input_a` \* `input_b` then perform one of the
following operations, using `input_c` against the dot product, storing the
result into the specified `output` zDNN tensor:
- Addition
- Compare - If dot product is greater than element.
- Compare - If dot product is greater or equal to element.
- Compare - If dot product is equal to element.
- Compare - If dot product is not equal to element.
- Compare - If dot product is less than or equal to element.
- Compare - If dot product is less than element.
For an operation type of addition, `input_c` is added to the intermediate dot
product. For operation types of comparison, the intermediate dot product is
compared to `input_c` and if the comparison is true, the result is set to a
value of 1; otherwise it is set to a value of 0.
The outermost dimension can optionally indicate that the inputs are stacks of
matrices. The results for each matrix stack is independent of other stacks but
all stacks are calculated in a single call.
#### Format
```C
zdnn_status zdnn_matmul_transpose_op(const zdnn_ztensor *input_a,
const zdnn_ztensor *input_b,
const zdnn_ztensor *input_c,
bool transpose_a, bool transpose_b,
zdnn_matmul_ops op_type,
zdnn_ztensor *output);
```
#### Input / Output matmul transpose tensor requirements
- See table in this section for `pre_transformed_desc` and shape requirements
for each tensor.
- All tensors must either be stacked or unstacked.
- Must follow [general tensor requirements](#gen-zten-reqs)
| type | input_a | input_b | input_c | result |
| --------- | -------------------- | -------------------- | ----------------- | -------------------- |
| unstacked | `ZDNN_2D` (m, n) | `ZDNN_2D` (n, p) | `ZDNN_1D` (p) | `ZDNN_2D` (m, p) |
| stacked | `ZDNN_3DS` (s, m, n) | `ZDNN_3DS` (s, n, p) | `ZDNN_2DS` (s, p) | `ZDNN_3DS` (s, m, p) |
| bcast1 | `ZDNN_2D` (m, n) | `ZDNN_3DS` (s, n, p) | `ZDNN_2DS` (s, p) | `ZDNN_3DS` (s, m, p) |
| bcast23 | `ZDNN_3DS` (s, m, n) | `ZDNN_2D` (n, p) | `ZDNN_1D` (p) | `ZDNN_3DS` (s, m, p) |
#### Parameters
- `zdnn_ztensor *input_a`
- Input tensor with the first matrix for multiplication
- pre_transformed shape and layout must match
[matmul transpose tensor requirements](#matmul-transpose-io-table)
- `zdnn_ztensor *input_b`
- Input tensor with the second matrix for multiplication
- pre_transformed shape and layout must match
[matmul transpose tensor requirements](#matmul-transpose-io-table)
- `zdnn_ztensor *input_c`
- Input tensor that will have the requested operation performed against the
intermediate dot product of `input_a` and `input_b`.
- pre_transformed shape and layout must match
[matmul transpose tensor requirements](#matmul-transpose-io-table)
- `bool transpose_a`
- Whether to transpose `input_a` prior to dot product.
- If `true`, `input_a` should have the unstacked dimensions (n, m) or stacked
dimensions (s, n, m)
- `bool transpose_b`
- Whether to transpose `input_b` prior to dot product.
- If `true`, `input_b` should have the unstacked dimensions (p, n) or stacked
dimensions (s, p, n)
- `zdnn_matmul_ops op_type`
- Operation to perform on dot product.
- `MATMUL_OP_ADDITION`
- `MATMUL_OP_GREATER`
- `MATMUL_OP_GREATER_EQUAL`
- `MATMUL_OP_EQUAL`
- `MATMUL_OP_NOT_EQUAL`
- `MATMUL_OP_LESSER_EQUAL`
- `MATMUL_OP_LESSER`
- `zdnn_ztensor *output`
- The output tensor which will hold the result of the operation in its buffer.
- pre_transformed shape and layout must match
[matmul transpose tensor requirements](#matmul-transpose-io-table)
#### Programming Notes
- `zdnn_matmul_transpose_op` is not supported when `NNPA_PARMBLKFORMAT_1` is not
installed and will return `ZDNN_UNAVAILABLE_FUNCTION`.
- Care must be exercised when comparing values for equality or inequality since
the order of operations and rounding may produce, what appear to be, slightly
different values when they are essentially the same value.
#### Returns (see [zDNN Statuses](#common-statuses) for descriptions)
- `ZDNN_OK`
- `ZDNN_INVALID_SHAPE`
- `ZDNN_INVALID_TYPE`
- `ZDNN_INVALID_FORMAT`
- `ZDNN_UNAVAILABLE_FUNCTION`
- [hardware statuses](#hw-statuses)
- `ZDNN_FUNC_RC_F000` - Invalid `op_type`.
- `ZDNN_FUNC_RC_F001` - Invalid input/output type or format combination.
#### Since
1.0.0
#### Requirements
This feature requires that:
- `zdnn_is_nnpa_installed()` returns true
- the underlying hardware supports zDNN APIs 1.0.x or later at runtime
See [Validating the environment at runtime](#runtime-val).
#### Framework Examples
[TensorFlow MatMul](https://www.tensorflow.org/api_docs/cc/class/tensorflow/ops/mat-mul)
[ONNX MatMul](https://onnx.ai/onnx/operators/onnx__MatMul.html#l-onnx-doc-matmul)
---
### zdnn_quantized_matmul_op
[Back to Table of Contents](#TOC)
#### Description
Given three input zDNN tensors `input_a`, `input_b`, and `input_c`, determine
the matrix multiplication of `input_a` \* `input_b` then perform one of the
following operations, using `input_c` against the dot product, storing the
result into the specified `output` zDNN tensor:
- Addition
- Compare - If dot product is greater than element.
- Compare - If dot product is greater or equal to element.
- Compare - If dot product is equal to element.
- Compare - If dot product is not equal to element.
- Compare - If dot product is less than or equal to element.
- Compare - If dot product is less than element.
For an operation type of addition, `input_c` is added to the intermediate dot
product. For operation types of comparison, the intermediate dot product is
compared to `input_c` and if the comparison is true, the result is set to a
value of 1; otherwise it is set to a value of 0.
The outermost dimension can optionally indicate that the inputs are stacks of
matrices. The results for each matrix stack is independent of other stacks but
all stacks are calculated in a single call.
When dequantize is `true` the output will be dequantized after computation.
When `pre_computed` is `true`. The pre-computed value of `input_c` for Addition
can be achieved using:
```C
Za = input_a->offset;
Sa = 1 / input_a->rec_scale;
Zb = input_b->offset;
Sb = 1 / input_b->rec_scale;
Zc = input_c->offset;
Sc = 1 / input_c->rec_scale;
Zy = output->offset;
Sy = 1 / output->rec_scale;
N = input_b->pre_transformed_desc->dim2;
pre_computed = Zy - (Sc/Sy) * Zc - (Sc/Sy) * input_c + ((Sa * Sb) / Sy) * NZaZb;
```
The pre-computed value of `input_c` for Compare can be achieved using:
```C
Za = input_a->offset;
Sa = 1 / input_a->rec_scale;
Zb = input_b->offset;
Sb = 1 / input_b->rec_scale;
Zc = input_c->offset;
Sc = 1 / input_c->rec_scale;
pre_computed = Sc / (Sa * Sb) * (input_c - Zc) + Za * sum(input_b, axis=-2)
```
#### Format
```C
zdnn_status zdnn_quantized_matmul_op(const zdnn_ztensor *input_a,
const zdnn_ztensor *input_b,
const zdnn_ztensor *input_c,
zdnn_matmul_ops op_type,
const int8_t clip_min,
const int8_t clip_max,
const bool disable_clipping,
const bool dequantize,
const bool pre_computed,
void *work_area,
zdnn_ztensor *output);
```
#### Input / Output quantized matmul tensor requirements
- See table in this section for `pre_transformed_desc` and shape requirements
for each tensor.
- All tensors must either be stacked or unstacked.
- Must follow [general tensor requirements](#gen-zten-reqs)
- All tensors should use `zdnn_generate_quantized_transformed_desc` when
generating transformed descriptors, passing the appropriate
`zdnn_quantized_transform_types`.
- All quantized tensors should use `zdnn_init_quantized_ztensor` or
`zdnn_init_quantized_ztensor_with_malloc` when initializing, passing the
`scale` and `offset` quantization parameters.
- `scale` must be in range ([-DLFLT_MAX](#zdnn_get_max_limit) <= scale <=
[DLFLT_MAX](#zdnn_get_max_limit)) and scale != 0.
- `offset` must be in range ([-DLFLT_MAX](#zdnn_get_max_limit) <= offset <=
[DLFLT_MAX](#zdnn_get_max_limit)).
- All quantized input tensors should use `zdnn_transform_quantized_ztensor` when
transforming, passing the `clip_min` and `clip_max` quantization parameters.
##### zdnn_data_layouts
| type | input_a | input_b | input_c | result |
| --------- | -------------------- | -------------------- | ----------------- | -------------------- |
| unstacked | `ZDNN_2D` (m, n) | `ZDNN_2D` (n, p) | `ZDNN_1D` (p) | `ZDNN_2D` (m, p) |
| stacked | `ZDNN_3DS` (s, m, n) | `ZDNN_3DS` (s, n, p) | `ZDNN_2DS` (s, p) | `ZDNN_3DS` (s, m, p) |
| bcast1 | `ZDNN_2D` (m, n) | `ZDNN_3DS` (s, n, p) | `ZDNN_2DS` (s, p) | `ZDNN_3DS` (s, m, p) |
| bcast23 | `ZDNN_3DS` (s, m, n) | `ZDNN_2D` (n, p) | `ZDNN_1D` (p) | `ZDNN_3DS` (s, m, p) |
##### zdnn_quantized_transform_types
| type | input_a | input_b | input_c | result |
| ---------- | ------------------- | ---------------------- | -------------- | ------------------- |
| normal | QUANTIZED_INT8 | QUANTIZED_WEIGHTS_INT8 | QUANTIZED_INT8 | QUANTIZED_DLFLOAT16 |
| on-the-fly | QUANTIZED_DLFLOAT16 | QUANTIZED_WEIGHTS_INT8 | QUANTIZED_INT8 | QUANTIZED_DLFLOAT16 |
#### Parameters
- `zdnn_ztensor *input_a`
- Input tensor with the first matrix for multiplication
- pre_transformed shape and layout must match
[quantized matmul tensor requirements](#quantized-matmul-io-table)
- `zdnn_ztensor *input_b`
- Input tensor with the second matrix for multiplication
- pre_transformed shape and layout must match
[quantized matmul tensor requirements](#quantized-matmul-io-table)
- `zdnn_ztensor *input_c`
- Input tensor that will have the requested operation performed against the
intermediate dot product of `input_a` and `input_b`.
- pre_transformed shape and layout must match
[quantized matmul tensor requirements](#quantized-matmul-io-table)
- `int8_t clip_min`
- Minimum quantized value for `input_a` prior to dot product.
- Only applicable when performing `on-the-fly` quantization.
- Must be less than `clip_max`.
- `int8_t clip_max`
- Maximum quantized value for `input_a` prior to dot product.
- Only applicable when performing `on-the-fly` quantization.
- Must be greater than `clip_min`.
- `bool disable_clipping`
- When `true` disables clipping and rounding.
- `bool dequantize`
- Whether to dequantize returned ztensor.
- `bool pre_computed`
- Whether bias is already pre-computed.
- `void *work_area`
- A preallocated memory address to use for temporary storage during internal
operation processing.
- If set to NULL, the operation will determine, allocate and free storage
automatically.
- Amount of required storage is the same as `input_c->buffer_size`.
- The start of the buffer must be 4k aligned.
- `zdnn_matmul_ops op_type`
- Operation to perform on dot product.
- `MATMUL_OP_ADDITION`
- `MATMUL_OP_GREATER`
- `MATMUL_OP_GREATER_EQUAL`
- `MATMUL_OP_EQUAL`
- `MATMUL_OP_NOT_EQUAL`
- `MATMUL_OP_LESSER_EQUAL`
- `MATMUL_OP_LESSER`
- `zdnn_ztensor *output`
- The output tensor which will hold the result of the operation in its buffer.
- pre_transformed shape and layout must match
[quantized matmul tensor requirements](#quantized-matmul-io-table)
#### Programming Notes
- `zdnn_quantized_matmul_op` is not supported when `NNPA_PARMBLKFORMAT_1` is not
installed and will return `ZDNN_UNAVAILABLE_FUNCTION`.
- Care must be exercised when comparing values for equality or inequality since
the order of operations and rounding may produce, what appear to be, slightly
different values when they are essentially the same value.
#### Returns (see [zDNN Statuses](#common-statuses) for descriptions)
- `ZDNN_OK`
- `ZDNN_INVALID_SHAPE`
- `ZDNN_INVALID_TYPE`: [Quantized zTensor Requirements](#quan-zten-reqs)
- `ZDNN_INVALID_FORMAT`
- `ZDNN_INVALID_SCALE`
- `ZDNN_INVALID_OFFSET`
- `ZDNN_INVALID_CLIPPING_VALUE`
- `ZDNN_UNAVAILABLE_FUNCTION`
- [hardware statuses](#hw-statuses)
- `ZDNN_FUNC_RC_F000` - Invalid `op_type`.
- `ZDNN_FUNC_RC_F001` - Invalid input/output type or format combination.
- `ZDNN_FUNC_RC_F002` - Invalid input/output scale.
#### Since
1.1.0
#### Requirements
This feature requires that:
- `zdnn_is_nnpa_installed()` returns true
- the underlying hardware supports zDNN APIs 1.1.x or later at runtime
See [Validating the environment at runtime](#runtime-val).
#### Framework Examples
[TensorFlow Quantized MatMul](https://www.tensorflow.org/api_docs/cc/class/tensorflow/ops/quantized-mat-mul)
[ONNX Quantize Linear](https://onnx.ai/onnx/operators/onnx__QuantizeLinear.html#l-onnx-doc-quantizelinear)
---
### zdnn_lstm
[Back to Table of Contents](#TOC)
#### Description
Implements Long-Short Term Memory layer (LSTM - Hochreiter 1997).
The following formula is computed for the input tensor input(t) for all time
steps:
(Default: f=Sigmoid, g=Tanh, h=Tanh):
```C
- it = f(Xt*(Wi^T) + Ht-1*(Ri^T) + Wbi + Rbi)
- ft = f(Xt*(Wf^T) + Ht-1*(Rf^T) + Wbf + Rbf)
- ct = g(Xt*(Wc^T) + Ht-1*(Rc^T) + Wbc + Rbc)
- Ct = ft (.) Ct-1 + it (.) ct
- ot = f(Xt*(Wo^T) + Ht-1*(Ro^T) + Wbo + Rbo)
- Ht = ot (.) h(Ct)
```
#### Format
```C
zdnn_status zdnn_lstm(const zdnn_ztensor *input, const zdnn_ztensor *h0,
const zdnn_ztensor *c0, const zdnn_ztensor *weights,
const zdnn_ztensor *biases,
const zdnn_ztensor *hidden_weights,
const zdnn_ztensor *hidden_biases,
lstm_gru_direction direction, void *work_area,
zdnn_ztensor *hn_output, zdnn_ztensor *cf_output);
```
Also see an [example](#example-of-an-application-calling-the-zdnn_lstm-api) in
the usage example section.
#### LSTM Input / Output requirements
- `num_hidden` dimensions:
- Any num_hidden dimension must be less than or equal to
`zdnn_get_max_for_dim(2) / 4` elements.
#### Parameters
- `zdnn_ztensor *input`
- Input must be a tensor with the shape (num_timesteps, num_batches,
num_features) prior to transformation with the `zdnn_transform_ztensor` API.
- Expects `pre_transformed_desc->layout` to be `ZDNN_3DS`.
- Must follow [general tensor requirements](#gen-zten-reqs)
- `zdnn_ztensor *h0`
- Tensor containing the initial hidden state with shape (num_dirs,
num_batches, num_hidden) prior to transformation with the
`zdnn_transform_ztensor` API.
- Expects `pre_transformed_desc->layout` to be `ZDNN_3DS`.
- Must follow [general tensor requirements](#gen-zten-reqs)
- Must follow [num_hidden requirements](#lstm-hid_sz)
- `zdnn_ztensor *c0`
- Tensor containing the initial cell state with shape (num_dirs, num_batches,
num_hidden) prior to transformation with the `zdnn_transform_ztensor` API.
- Expects `pre_transformed_desc->layout` to be `ZDNN_3DS`.
- Must follow [general tensor requirements](#gen-zten-reqs)
- Must follow [num_hidden requirements](#lstm-hid_sz)
- `zdnn_ztensor *weights`
- Tensor containing the concatenated input connection weights in Forget,
Input, Cell, Output (FICO) order.
- Prior to transformation, each gate needs to be transposed to shape
(num_dirs, num_features, num_hidden) by the caller.
- Expects `pre_transformed_desc->layout` to be `ZDNN_3DS`.
- Expects `zdnn_concat_info` having the following flags turned on:
- `RNN_TYPE_LSTM`
- `USAGE_WEIGHTS`
- Appropriate `PREV_LAYER` flag:
- `PREV_LAYER_NONE` if `input` tensor is not from a previous RNN layer
- `PREV_LAYER_UNI` if `input` tensor is uni-directional output from a
previous RNN layer
- `PREV_LAYER_BIDIR` if `input` tensor is bi-directional output from a
previous RNN layer
- Must follow [concatenated tensor requirements](#concat-zten-reqs)
- Must follow [num_hidden requirements](#lstm-hid_sz)
- `zdnn_ztensor *biases`
- Tensor containing the concatenated input connection bias in Forget, Input,
Cell, Output (FICO) order.
- Prior to transformation, expects each gate needs to be shape (num_dirs,
num_hidden).
- Expects `pre_transformed_desc->layout` to be `ZDNN_2DS`.
- Expects `zdnn_concat_info` having the following flags turned on:
- `RNN_TYPE_LSTM`
- `USAGE_BIASES`
- Appropriate `PREV_LAYER` flag:
- `PREV_LAYER_NONE` if `input` tensor is not from a previous RNN layer
- `PREV_LAYER_UNI` if `input` tensor is uni-directional output from a
previous RNN layer
- `PREV_LAYER_BIDIR` if `input` tensor is bi-directional output from a
previous RNN layer
- Must follow [concatenated tensor requirements](#concat-zten-reqs)
- Must follow [num_hidden requirements](#lstm-hid_sz)
- `zdnn_ztensor *hidden_weights`
- Tensor containing the concatenated hidden connection weights in Forget,
Input, Cell, Output (FICO) order.
- Prior to transformation, each gate needs to be transposed to shape
(num_dirs, num_hidden, num_hidden) by the caller.
- Expects `pre_transformed_desc->layout` to be `ZDNN_3DS`.
- Expects `zdnn_concat_info` having the following flags turned on:
- `RNN_TYPE_LSTM`
- `USAGE_HIDDEN_WEIGHTS`
- Appropriate `PREV_LAYER` flag:
- `PREV_LAYER_NONE` if `input` tensor is not from a previous RNN layer
- `PREV_LAYER_UNI` if `input` tensor is uni-directional output from a
previous RNN layer
- `PREV_LAYER_BIDIR` if `input` tensor is bi-directional output from a
previous RNN layer
- Must follow [concatenated tensor requirements](#concat-zten-reqs)
- Must follow [num_hidden requirements](#lstm-hid_sz)
- `zdnn_ztensor *hidden_biases`
- Tensor containing the concatenated hidden connection bias in Forget, Input,
Cell, Output (FICO) order.
- Prior to transformation, expects each gate needs to be shape (num_dirs,
num_hidden).
- Expects `pre_transformed_desc->layout` to be `ZDNN_2DS`.
- Expects `zdnn_concat_info` having the following flags turned on:
- `RNN_TYPE_LSTM`
- `USAGE_HIDDEN_BIASES`
- Appropriate `PREV_LAYER` flag:
- `PREV_LAYER_NONE` if `input` tensor is not from a previous RNN layer
- `PREV_LAYER_UNI` if `input` tensor is uni-directional output from a
previous RNN layer
- `PREV_LAYER_BIDIR` if `input` tensor is bi-directional output from a
previous RNN layer
- Must follow [concatenated tensor requirements](#concat-zten-reqs)
- Must follow [num_hidden requirements](#lstm-hid_sz)
- `lstm_gru_direction direction`
- Direction indicator of `lstm_gru_direction direction` type. Valid values:
- `FWD` (forward)
- `BWD` (backward)
- `BIDIR` (bi-directional).
- For input and output shapes, the num_dirs dimension should be:
- `1` for unidirectional calls such as FWD or BWD
- `2` for bidirectional calls such that:
- dimension 0 contains FWD values.
- dimension 1 contains BWD values.
- `void *work_area`
- A preallocated memory address to use for temporary storage during internal
operation processing.
- If set to NULL, the operation will determine, allocate and free storage
automatically.
- Amount of required storage can be determined given the LSTM timestep, batch,
and num_hidden values.
- The sample code below creates a ztensor descriptor that is an equivalent
size of the required `work_area`. To use this sample code yourself,
replace the `num_timesteps`, `num_batches`, and `num_hidden` variables
with your own values.
```C
zdnn_tensor_desc desc;
desc.dim4 = (4 * num_timesteps) + 6;
desc.dim3 = 1;
desc.dim2 = num_batches;
desc.dim1 = num_hidden;
uint64_t work_area_size = zdnn_getsize_ztensor(&desc);
```
- For bidirectional, twice the amount of contiguous storage is required.
- The start of the buffer must be 4k aligned.
- `zdnn_ztensor *hn_output`
- Output results of the hidden states
- Expects pre_transformed_desc->layout to be `ZDNN_4DS`.
- Must follow [general tensor requirements](#gen-zten-reqs)
- Must follow [num_hidden requirements](#lstm-hid_sz)
- Output pre-transformed shapes:
- all timesteps: (num_timesteps, num_dirs, num_batches, num_hidden)
- final timestep only: (1, num_dirs, num_batches, num_hidden)
- For bidirectional (`BIDIR`) output:
- Forward and backward results are concatenated on the innermost dimension.
- Can be used directly as input for subsequent RNN layers without needing
untransformation.
- Can not be used directly as input for other non-RNN zDNN ops.
- Untransformation is supported.
- Note that for `BWD` and the backward component of `BIDIR` directions, the
output order matches the order of the input, not the processing order. For
example, the first input timestep is the last to be processed and its result
is the first timestep of the output.
- `zdnn_ztensor *cf_output`
- Output results of the cell state for the last processed timestep
- Expects pre_transformed_desc->layout to be `ZDNN_4DS`.
- Must follow [general tensor requirements](#gen-zten-reqs)
- Must follow [num_hidden requirements](#lstm-hid_sz)
- Output pre-transformed shapes:
- (1, num_dirs, num_batches, num_hidden)
- For bidirectional (`BIDIR`):
- Forward and backward results are concatenated on the innermost dimension.
- Can not be used directly as input for other non-RNN zDNN ops.
- Untransformation is supported.
#### Summary
| | pre-transformed layout | pre-transformed shape |
| -------------- | ---------------------- | --------------------------------------------------------------------------------------------------- |
| input | `ZDNN_3DS` | (num_timesteps, num_batches, num_features) |
| h0 | `ZDNN_3DS` | (num_dirs, num_batches, num_hidden) |
| c0 | `ZDNN_3DS` | (num_dirs, num_batches, num_hidden) |
| weights | `ZDNN_3DS` | (num_dirs, num_features, num_hidden) |
| bias | `ZDNN_2DS` | (num_dirs, num_hidden) |
| hidden_weights | `ZDNN_3DS` | (num_dirs, num_hidden, num_hidden) |
| hidden_biases | `ZDNN_2DS` | (num_dirs, num_hidden) |
| hn_output | `ZDNN_4DS` | (num_timesteps, num_dirs, num_batches, num_hidden)
(last timestep only when `num_timesteps` = 1) |
| cf_output | `ZDNN_4DS` | (1, num_dirs, num_batches, num_hidden) |
| | create transformed descriptor via |
| -------------- | ------------------------------------------------------------------------------------------------------------------------------------------------------------------------- |
| input | `zdnn_generate_transformed_desc` |
| h0 | `zdnn_generate_transformed_desc` |
| c0 | `zdnn_generate_transformed_desc` |
| weights | `zdnn_generate_transformed_desc_concatenated` - `RNN_TYPE_LSTM` + `USAGE_WEIGHTS` + one of the following:
`PREV_LAYER_NONE`/`PREV_LAYER_UNI`/`PREV_LAYER_BIDIR` |
| bias | `zdnn_generate_transformed_desc_concatenated` - `RNN_TYPE_LSTM` + `USAGE_BIASES` + one of the following:
`PREV_LAYER_NONE`/`PREV_LAYER_UNI`/`PREV_LAYER_BIDIR` |
| hidden_weights | `zdnn_generate_transformed_desc_concatenated` - `RNN_TYPE_LSTM` + `USAGE_HIDDEN_WEIGHTS` + one of the following:
`PREV_LAYER_NONE`/`PREV_LAYER_UNI`/`PREV_LAYER_BIDIR` |
| hidden_biases | `zdnn_generate_transformed_desc_concatenated` - `RNN_TYPE_LSTM` + `USAGE_HIDDEN_BIASES` + one of the following:
`PREV_LAYER_NONE`/`PREV_LAYER_UNI`/`PREV_LAYER_BIDIR` |
| hn_output | `zdnn_generate_transformed_desc` |
| cf_output | `zdnn_generate_transformed_desc` |
#### Returns (see [zDNN Statuses](#common-statuses) for descriptions)
- `ZDNN_OK`
- `ZDNN_INVALID_TYPE`
- `ZDNN_INVALID_FORMAT`
- `ZDNN_INVALID_SHAPE` - (if any of the following are not true)
- `hn_output` timesteps dimension must be 1 or the same size as `input`
timestep dimension.
- All tensors with a direction dimension have the same direction dimension
size.
- `input` timestep dimension must be greater than or equal to 1.
- Other general shape violations (exceeds MDIS, etc.)
- `ZDNN_INVALID_DIRECTION` - `direction` parameter was not a recognized
`lstm_gru_direction`.
- `ZDNN_ALLOCATION_FAILURE` - A preallocated `work_area` was not specified and
internal allocation for the required memory failed.
- [hardware statuses](#hw-statuses)
#### Since
1.1.0
#### Requirements
This feature requires that:
- `zdnn_is_nnpa_installed()` returns true
- the underlying hardware supports zDNN APIs 1.1.x or later at runtime
See [Validating the environment at runtime](#runtime-val).
#### Framework Examples
[TensorFlow LSTM](https://www.tensorflow.org/api_docs/python/tf/keras/layers/LSTMCell)
[ONNX LSTM](https://onnx.ai/onnx/operators/onnx__LSTM.html#l-onnx-doc-lstm)
---
### zdnn_gru
[Back to Table of Contents](#TOC)
#### Description
Implements Gated Recurrent Unit (Kyunghyun Cho 2014). Supports only reset after
linear.
The following formula is computed for the input tensor input(t) for all time
steps:
```C
(Default: f=Sigmoid, g=Tanh):
- zt = f(Xt*(Wz^T) + Ht-1*(Rz^T) + Wbz + Rbz)
- rt = f(Xt*(Wr^T) + Ht-1*(Rr^T) + Wbr + Rbr)
- ht = g(Xt*(Wh^T) + (rt (.) (Ht-1*(Rh^T) + Rbh)) + Wbh)
- Ht = (1 - zt) (.) ht + zt (.) Ht-1
```
#### Format
```C
zdnn_status zdnn_gru(const zdnn_ztensor *input, const zdnn_ztensor *h0,
const zdnn_ztensor *weights, const zdnn_ztensor *biases,
const zdnn_ztensor *hidden_weights,
const zdnn_ztensor *hidden_biases,
lstm_gru_direction direction, void *work_area,
zdnn_ztensor *hn_output);
```
Also see an [example](#example-of-an-application-calling-the-zdnn_gru-api) in
the usage example section.
#### GRU Input / Output requirements
- `num_hidden` dimensions:
- Any num_hidden dimension must be less than or equal to
`zdnn_get_max_for_dim(2) / 3` elements.
#### Parameters
- `zdnn_ztensor *input`
- Input must be a tensor with the shape (num_timesteps, num_batches,
num_features) prior to transformation with the `zdnn_transform_ztensor` API.
- Expects `pre_transformed_desc->layout` to be `ZDNN_3DS`.
- Must follow [general tensor requirements](#gen-zten-reqs)
- `zdnn_ztensor *h0`
- Tensor containing the initial hidden state with shape (num_dirs,
num_batches, num_hidden) prior to transformation with the
`zdnn_transform_ztensor` API.
- Expects `pre_transformed_desc->layout` to be `ZDNN_3DS`.
- Must follow [general tensor requirements](#gen-zten-reqs)
- Must follow [num_hidden requirements](#gru-hid_sz)
- `zdnn_ztensor *weights`
- Tensor containing the concatenated input connection weights in (Z)update,
Reset, Hidden, (ZRH) order.
- Prior to transformation, each gate needs to be transposed to shape
(num_dirs, num_features, num_hidden) by the caller.
- Expects `pre_transformed_desc->layout` to be `ZDNN_3DS`.
- Expects `zdnn_concat_info` having the following flags turned on:
- `RNN_TYPE_GRU`
- `USAGE_WEIGHTS`
- Appropriate `PREV_LAYER` flag:
- `PREV_LAYER_NONE` if `input` tensor is not from a previous RNN layer
- `PREV_LAYER_UNI` if `input` tensor is uni-directional output from a
previous RNN layer
- `PREV_LAYER_BIDIR` if `input` tensor is bi-directional output from a
previous RNN layer
- Must follow [concatenated tensor requirements](#concat-zten-reqs)
- Must follow [num_hidden requirements](#gru-hid_sz)
- `zdnn_ztensor *biases`
- Tensor containing the concatenated input connection bias in (Z)update,
Reset, Hidden, (ZRH) order.
- Prior to transformation, expects each gate needs to be shape (num_dirs,
num_hidden).
- Expects `pre_transformed_desc->layout` to be `ZDNN_2DS`.
- Expects `zdnn_concat_info` having the following flags turned on:
- `RNN_TYPE_GRU`
- `USAGE_BIASES`
- Appropriate `PREV_LAYER` flag:
- `PREV_LAYER_NONE` if `input` tensor is not from a previous RNN layer
- `PREV_LAYER_UNI` if `input` tensor is uni-directional output from a
previous RNN layer
- `PREV_LAYER_BIDIR` if `input` tensor is bi-directional output from a
previous RNN layer
- Must follow [concatenated tensor requirements](#concat-zten-reqs)
- Must follow [num_hidden requirements](#gru-hid_sz)
- `zdnn_ztensor *hidden_weights`
- Tensor containing the concatenated hidden connection weights in (Z)update,
Reset, Hidden, (ZRH) order.
- Prior to transformation, each gate needs to be transposed to shape
(num_dirs, num_hidden, num_hidden) by the caller.
- Expects `pre_transformed_desc->layout` to be `ZDNN_3DS`.
- Expects `zdnn_concat_info` having the following flags turned on:
- `RNN_TYPE_GRU`
- `USAGE_HIDDEN_WEIGHTS`
- Appropriate `PREV_LAYER` flag:
- `PREV_LAYER_NONE` if `input` tensor is not from a previous RNN layer
- `PREV_LAYER_UNI` if `input` tensor is uni-directional output from a
previous RNN layer
- `PREV_LAYER_BIDIR` if `input` tensor is bi-directional output from a
previous RNN layer
- Must follow [concatenated tensor requirements](#concat-zten-reqs)
- Must follow [num_hidden requirements](#gru-hid_sz)
- `zdnn_ztensor *hidden_biases`
- Tensor containing the concatenated hidden connection bias in (Z)update,
Reset, Hidden, (ZRH) order.
- Prior to transformation, expects each gate needs to be shape (num_dirs,
num_hidden).
- Expects `pre_transformed_desc->layout` to be `ZDNN_2DS`.
- Expects `zdnn_concat_info` having the following flags turned on:
- `RNN_TYPE_GRU`
- `USAGE_HIDDEN_BIASES`
- Appropriate `PREV_LAYER` flag:
- `PREV_LAYER_NONE` if `input` tensor is not from a previous RNN layer
- `PREV_LAYER_UNI` if `input` tensor is uni-directional output from a
previous RNN layer
- `PREV_LAYER_BIDIR` if `input` tensor is bi-directional output from a
previous RNN layer
- Must follow [concatenated tensor requirements](#concat-zten-reqs)
- Must follow [num_hidden requirements](#gru-hid_sz)
- `lstm_gru_direction direction`
- Direction indicator of `lstm_gru_direction direction` type. Valid values:
- `FWD` (forward)
- `BWD` (backward)
- `BIDIR` (bi-directional).
- For input shapes, the num_dirs dimension should be:
- `1` for unidirectional calls such as FWD or BWD
- `2` for bidirectional calls such that:
- dimension 0 contains FWD values.
- dimension 1 contains BWD values.
- `void *work_area`
- A preallocated memory address to use for temporary storage during internal
operation processing.
- If set to NULL, the operation will determine, allocate and free storage
automatically.
- Amount of required storage can be determined given the GRU timestep, batch,
and num_hidden values.
- The sample code below creates a ztensor descriptor that is an equivalent
size of the required `work_area`. To use this sample code yourself,
replace the `num_timesteps`, `num_batches`, and `num_hidden` variables
with your own values.
```C
zdnn_tensor_desc desc;
desc.dim4 = (3 * num_timesteps) + 5;
desc.dim3 = 1;
desc.dim2 = num_batches;
desc.dim1 = num_hidden;
uint64_t work_area_size = zdnn_getsize_ztensor(&desc);
```
- For bidirectional, twice the amount of contiguous storage is required.
- The start of the buffer must be 4k aligned.
- `zdnn_ztensor *hn_output`
- Output results of the hidden states
- Expects pre_transformed_desc->layout to be `ZDNN_4DS`.
- Must follow [general tensor requirements](#gen-zten-reqs)
- Must follow [num_hidden requirements](#lstm-hid_sz)
- Output pre-transformed shapes:
- all timesteps: (num_timesteps, num_dirs, num_batches, num_hidden)
- final timestep only: (1, num_dirs, num_batches, num_hidden)
- For bidirectional (`BIDIR`) output:
- Forward and backward results are concatenated on the innermost dimension.
- Can be used directly as input for subsequent RNN layers without needing
untransformation.
- Can not be used directly as input for other non-RNN zDNN ops.
- Untransformation is supported.
- Note that for `BWD` and the backward component of `BIDIR` directions, the
output order matches the order of the input, not the processing order. For
example, the first input timestep is the last to be processed and its result
is the first timestep of the output.
#### Summary
| | pre-transformed layout | pre-transformed shape |
| -------------- | ---------------------- | --------------------------------------------------------------------------------------------------- |
| input | `ZDNN_3DS` | (num_timesteps, num_batches, num_features) |
| h0 | `ZDNN_3DS` | (num_dirs, num_batches, num_hidden) |
| weights | `ZDNN_3DS` | (num_dirs, num_features, num_hidden) |
| bias | `ZDNN_2DS` | (num_dirs, num_hidden) |
| hidden_weights | `ZDNN_3DS` | (num_dirs, num_hidden, num_hidden) |
| hidden_biases | `ZDNN_2DS` | (num_dirs, num_hidden) |
| hn_output | `ZDNN_4DS` | (num_timesteps, num_dirs, num_batches, num_hidden)
(last timestep only when `num_timesteps` = 1) |
| | create transformed descriptor via |
| -------------- | ------------------------------------------------------------------------------------------------------------------------------------------------------------------------- |
| input | `zdnn_generate_transformed_desc` |
| h0 | `zdnn_generate_transformed_desc` |
| weights | `zdnn_generate_transformed_desc_concatenated` - `RNN_TYPE_LSTM` + `USAGE_WEIGHTS` + one of the following:
`PREV_LAYER_NONE`/`PREV_LAYER_UNI`/`PREV_LAYER_BIDIR` |
| bias | `zdnn_generate_transformed_desc_concatenated` - `RNN_TYPE_LSTM` + `USAGE_BIASES` + one of the following:
`PREV_LAYER_NONE`/`PREV_LAYER_UNI`/`PREV_LAYER_BIDIR` |
| hidden_weights | `zdnn_generate_transformed_desc_concatenated` - `RNN_TYPE_LSTM` + `USAGE_HIDDEN_WEIGHTS` + one of the following:
`PREV_LAYER_NONE`/`PREV_LAYER_UNI`/`PREV_LAYER_BIDIR` |
| hidden_biases | `zdnn_generate_transformed_desc_concatenated` - `RNN_TYPE_LSTM` + `USAGE_HIDDEN_BIASES` + one of the following:
`PREV_LAYER_NONE`/`PREV_LAYER_UNI`/`PREV_LAYER_BIDIR` |
| hn_output | `zdnn_generate_transformed_desc` |
#### Returns (see [zDNN Statuses](#common-statuses) for descriptions)
- `ZDNN_OK`
- `ZDNN_INVALID_TYPE`
- `ZDNN_INVALID_FORMAT`
- `ZDNN_INVALID_SHAPE` - (if any of the following are not true)
- `hn_output` timesteps dimension must be 1 or the same size as `input`
timestep dimension.
- All tensors with a direction dimension have the same direction dimension
size.
- `input` timestep dimension must be greater than or equal to 1.
- Other general shape violations (exceeds MDIS, etc.)
- `ZDNN_INVALID_DIRECTION` - `direction` parameter was not a recognized
`lstm_gru_direction`.
- `ZDNN_ALLOCATION_FAILURE` - A preallocated `work_area` was not specified and
internal allocation for the required memory failed.
- [hardware statuses](#hw-statuses)
#### Since
1.0.0
#### Requirements
This feature requires that:
- `zdnn_is_nnpa_installed()` returns true
- the underlying hardware supports zDNN APIs 1.1.x or later at runtime
See [Validating the environment at runtime](#runtime-val).
#### Framework Examples
[TensorFlow GRU](https://www.tensorflow.org/api_docs/python/tf/keras/layers/GRUCell)
[ONNX GRU](https://onnx.ai/onnx/operators/onnx__GRU.html#l-onnx-doc-gru)
---
### zdnn_avgpool2d
[Back to Table of Contents](#TOC)
#### Description
Given an input tensor in zDNN transformed format, padding type, kernel size and
kernel stride, produces a downsampled tensor reducing the middle dimensions
based on the mean values within the kernel window at each step and stores the
results into the provided output zDNN tensor.
#### Format
```C
zdnn_status zdnn_avgpool2d(const zdnn_ztensor *input,
zdnn_pool_padding padding_type,
uint32_t kernel_height, uint32_t kernel_width,
uint32_t stride_height, uint32_t stride_width,
zdnn_ztensor *output);
```
#### Parameters
- `zdnn_ztensor *input`
- Tensor with original values to be downsampled in the output tensor.
- Must be a [ZDNN_NHWC](#common-layouts) tensor with pre_transformed shape
[batch_Num, Height, Width, Channel].
- See [Parameter Restrictions](#avgpool2d-parm-restrictions) below for
information on the expected shape of the input tensor.
- Must follow [general tensor requirements](#gen-zten-reqs)
- `padding_type`
- The type of padding to use for the pooling operations.
- Valid values: are `SAME_PADDING` or `VALID_PADDING`.
- See [Parameter Restrictions](#avgpool2d-parm-restrictions) below for
information on the expected value of padding_type.
- For information on "same" vs "valid" padding see:
.
- `kernel_height`
- Size of the kernel window that passes over the input's height dimension.
- See [Parameter Restrictions](#avgpool2d-parm-restrictions) below for
information on the expected value of kerneL_height.
- `kernel_width`
- Size of the kernel window that passes over the input's width dimension.
- See [Parameter Restrictions](#avgpool2d-parm-restrictions) below for
information on the expected value of kerneL_width.
- `stride_height`
- Number of positions the kernel moves over input's height dimension at each
step.
- If `stride_height` is 0 then `stride_width` must also be 0.
- If strides are greater than 0 then `stride_height` must be less than or
equal to 30.
- `stride_width`
- Number of positions the kernel moves over the input's width dimension at
each step.
- If `stride_height` is 0 then `stride_width` must also be 0.
- If strides are greater than 0 then `stride_width` must be less than or equal
to 30.
- `zdnn_ztensor *output`
- The result tensor which will hold the result of the pooling operation its
buffer.
- Must be a [ZDNN_NHWC](#common-layouts) tensor with pre_transformed shape
[batch_Num, Height, Width, Channel].
- See [Parameter Restrictions](#avgpool2d-parm-restrictions) below for
information on the expected shape of the output tensor.
- Must follow [general tensor requirements](#gen-zten-reqs)
#### AvgPool2D Parameter Restrictions
Parameter restrictions may vary based on provided strides and padding_type.
- Input tensor batch_Num and Channel dimensions must always match the output
tensor's respective dimensions.
- If strides are 0:
- Both input tensor's Height dimension and the kernel_height must match and be
less than or equal to 1024.
- Both input tensor's Width dimension and the kernel_width must match and be
less than or equal to 1024.
- Output tensor's height and width dimensions must be 1.
- padding_type must be `VALID_PADDING`.
- If strides are greater than zero:
- kernel_width and kernel_height must be less than or equal to 64.
- input tensor's height or weight dimension must not be greater than 1024.
- If padding_type is `SAME_PADDING`:
- Output tensor's height dimension must equal
`ceil((float)input's height / stride_height)`.
- Output tensor's width dimension must equal
`ceil((float)input's width / stride_width)`.
- If padding_type is `VALID_PADDING`:
- Output tensor's height dimension must equal
`ceil((float)(input's height - kernel_height + 1) / stride_height)`.
- Output tensor's width dimension must equal
`ceil((float)(input's width - kernel_width + 1) / stride_width)`.
#### Programming Notes
- If the magnitude of difference between elements of `input` is large (greater
than 10), accuracy may be reduced.
#### Returns (see [zDNN Statuses](#common-statuses) for descriptions)
- `ZDNN_OK`
- `ZDNN_INVALID_SHAPE`
- Shape of input or output tensor is invalid based on given kernel and stride
parameters
- Other general shape violations (exceeds MDIS, etc.)
- `ZDNN_INVALID_TYPE`
- `ZDNN_INVALID_FORMAT`
- `ZDNN_INVALID_STRIDE_PADDING`
- `ZDNN_INVALID_STRIDES` - One stride was non-zero, but not the other.
- [hardware statuses](#hw-statuses)
- `ZDNN_EXCEEDS_MDIS` will also occur if any of the following conditions
occur:
- stride_height is larger than `zdnn_get_max_for_dim(3)`.
- stride_width is larger than `zdnn_get_max_for_dim(2)`.
- kernel_height is 0 or is larger than `zdnn_get_max_for_dim(3)`.
- kernel_width is 0 or is larger than `zdnn_get_max_for_dim(2)`.
- `ZDNN_FUNC_RC_F000` - Invalid `padding_type`
- `ZDNN_FUNC_RC_F001` - `stride_height` = 0 and `stride_width` = 0, but a
kernel parameter is greater than allowed (see `kernel_height` or
`kernel_width` above)
- `ZDNN_FUNC_RC_F002` - `stride_height` > 0 and `stride_width` > 0, but a
kernel parameter is greater than allowed (see `kernel_height` or
`kernel_width` above)
- `ZDNN_FUNC_RC_F003` - `stride_height` > 0 and `stride_width` > 0, but a
stride parameter is greater than allowed (see `stride_height` or
`stride_width` above)
- `ZDNN_FUNC_RC_F004` - `stride_height` > 0 and `stride_width` > 0, but either
input tensor's height or weight dimension is greater than 1024.
#### Since
1.0.0
#### Requirements
This feature requires that:
- `zdnn_is_nnpa_installed()` returns true
- the underlying hardware supports zDNN APIs 1.1.x or later at runtime
See [Validating the environment at runtime](#runtime-val).
#### Framework Examples
[TensorFlow AvgPool](https://www.tensorflow.org/api_docs/cc/class/tensorflow/ops/avg-pool)
[ONNX AvgPool](https://onnx.ai/onnx/operators/onnx__AveragePool.html#l-onnx-doc-averagepool)
---
### zdnn_maxpool2d
[Back to Table of Contents](#TOC)
#### Description
Given an input tensor in zDNN transformed format, padding type, kernel size and
kernel stride, produces a downsampled tensor reducing the middle dimensions
based on the maximum values within the kernel window at each step and stores the
results into the provided output zDNN tensor.
#### Format
```C
zdnn_status zdnn_maxpool2d(const zdnn_ztensor *input,
zdnn_pool_padding padding_type,
uint32_t kernel_height, uint32_t kernel_width,
uint32_t stride_height, uint32_t stride_width,
zdnn_ztensor *output);
```
#### Parameters
- `zdnn_ztensor *input`
- Tensor with original values to be downsampled in the output tensor.
- Must be a [ZDNN_NHWC](#common-layouts) tensor with pre_transformed shape
[batch_Num, Height, Width, Channel].
- See [Parameter Restrictions](#maxpool2d-parm-restrictions) below for
information on the expected shape of the input tensor.
- Must follow [general tensor requirements](#gen-zten-reqs)
- `padding_type`
- The type of padding to use for the pooling operations.
- Valid values: are `SAME_PADDING` or `VALID_PADDING`.
- See [Parameter Restrictions](#maxpool2d-parm-restrictions) below for
information on the expected value of padding_type.
- For information on "same" vs "valid" padding see:
.
- `kernel_height`
- Size of the kernel window that passes over the input's height dimension.
- See [Parameter Restrictions](#maxpool2d-parm-restrictions) below for
information on the expected value of kerneL_height.
- `kernel_width`
- Size of the kernel window that passes over the input's width dimension.
- See [Parameter Restrictions](#maxpool2d-parm-restrictions) below for
information on the expected value of kerneL_width.
- `stride_height`
- Number of positions the kernel moves over input's height dimension at each
step.
- If `stride_height` is 0 then `stride_width` must also be 0.
- If strides are greater than 0 then `stride_height` must be less than or
equal to 30.
- `stride_width`
- Number of positions the kernel moves over the input's width dimension at
each step.
- If `stride_height` is 0 then `stride_width` must also be 0.
- If strides are greater than 0 then `stride_width` must be less than or equal
to 30.
- `zdnn_ztensor *output`
- The result tensor which will hold the result of the pooling operation its
buffer.
- Must be a [ZDNN_NHWC](#common-layouts) tensor with pre_transformed shape
[batch_Num, Height, Width, Channel].
- See [Parameter Restrictions](#maxpool2d-parm-restrictions) below for
information on the expected shape of the output tensor.
- Must follow [general tensor requirements](#gen-zten-reqs)
#### MaxPool2D Parameter Restrictions
Parameter restrictions may vary based on provided strides and padding_type.
- Input tensor batch_Num and Channel dimensions must always match the output
tensor's respective dimensions.
- If strides are 0:
- Both input tensor's Height dimension and the kernel_height must match and be
less than or equal to 1024.
- Both input tensor's Width dimension and the kernel_width must match and be
less than or equal to 1024.
- Output tensor's height and width dimensions must be 1.
- padding_type must be `VALID_PADDING`.
- If strides are greater than zero:
- kernel_width and kernel_height must be less than or equal to 64.
- input tensor's height or weight dimension must not be greater than 1024.
- If padding_type is `SAME_PADDING`:
- Output tensor's height dimension must equal
`ceil((float)input's height / stride_height)`.
- Output tensor's width dimension must equal
`ceil((float)input's width / stride_width)`.
- If padding_type is `VALID_PADDING`:
- Output tensor's height dimension must equal
`ceil((float)(input's height - kernel_height + 1) / stride_height)`.
- Output tensor's width dimension must equal
`ceil((float)(input's width - kernel_width + 1) / stride_width)`.
#### Programming Notes
- If the magnitude of difference between elements of `input` is large (greater
than 10), accuracy may be reduced.
#### Returns (see [zDNN Statuses](#common-statuses) for descriptions)
- `ZDNN_OK`
- `ZDNN_INVALID_SHAPE`
- Shape of input or output tensor is invalid based on given kernel and stride
parameters
- Other general shape violations (exceeds MDIS, etc.)
- `ZDNN_INVALID_TYPE`
- `ZDNN_INVALID_FORMAT`
- `ZDNN_INVALID_STRIDE_PADDING`
- `ZDNN_INVALID_STRIDES` - One stride was non-zero, but not the other.
- [hardware statuses](#hw-statuses)
- `ZDNN_EXCEEDS_MDIS` will also occur if any of the following conditions
occur:
- stride_height is larger than `zdnn_get_max_for_dim(3)`.
- stride_width is larger than `zdnn_get_max_for_dim(2)`.
- kernel_height is 0 or is larger than `zdnn_get_max_for_dim(3)`.
- kernel_width is 0 or is larger than `zdnn_get_max_for_dim(2)`.
- `ZDNN_FUNC_RC_F000` - Invalid `padding_type`
- `ZDNN_FUNC_RC_F001` - `stride_height` = 0 and `stride_width` = 0, but a
kernel parameter is greater than allowed (see `kernel_height` or
`kernel_width` above)
- `ZDNN_FUNC_RC_F002` - `stride_height` > 0 and `stride_width` > 0, but a
kernel parameter is greater than allowed (see `kernel_height` or
`kernel_width` above)
- `ZDNN_FUNC_RC_F003` - `stride_height` > 0 and `stride_width` > 0, but a
stride parameter is greater than allowed (see `stride_height` or
`stride_width` above)
- `ZDNN_FUNC_RC_F004` - `stride_height` > 0 and `stride_width` > 0, but either
input tensor's height or weight dimension is greater than 1024.
#### Since
1.0.0
#### Requirements
This feature requires that:
- `zdnn_is_nnpa_installed()` returns true
- the underlying hardware supports zDNN APIs 1.1.x or later at runtime
See [Validating the environment at runtime](#runtime-val).
#### Framework Examples
[TensorFlow MaxPool](https://www.tensorflow.org/api_docs/cc/class/tensorflow/ops/max-pool)
[ONNX MaxPool](https://onnx.ai/onnx/operators/onnx__MaxPool.html#l-onnx-doc-maxpool)
---
### zdnn_conv2d
[Back to Table of Contents](#TOC)
#### Description
Perform 2D convolution over an input tensor in zDNN transformed format.
First the `input` tensor is convolved with the `kernel` tensor. Then the `bias`
tensor is added to the results. Then if `act_func` is not `CONV2D_ACT_NONE`, the
activation function is applied to the results. Then if `act_func` is set to
`CONV2D_ACT_RELU`, and clipping_value is not `NULL` or `0`, clipping is
performed against the intermediate result where z = min(intermediate_result,
clipping_value). Finally the results are stored into the provided output zDNN
tensor.
#### Format
```C
zdnn_status zdnn_conv2d(const zdnn_ztensor *input,
const zdnn_ztensor *kernel,
const zdnn_ztensor *bias,
zdnn_pool_padding padding_type,
uint32_t stride_height, uint32_t stride_width,
zdnn_conv2d_act act_func,
const void *clipping_value, zdnn_ztensor *output);
```
#### Parameters
- `zdnn_ztensor *input`
- Tensor with original values to be downsampled in the output tensor.
- Must be a [ZDNN_NHWC](#common-layouts) tensor with pre_transformed shape
[num_batches, height_in, width_in, channels_in].
- See [Convolution 2D Requirements](#convolution-2d-requirements) for
requirements.
- Must follow [general tensor requirements](#gen-zten-reqs)
- `zdnn_ztensor *kernel`
- The kernel tensor to convolute with the input tensor.
- Must be a [ZDNN_HWCK](#common-layouts) tensor with pre_transformed shape
[kernel_height, kernel_width, channels_in, channels_out].
- See [Convolution 2D Requirements](#convolution-2d-requirements) for
requirements.
- Must follow [general tensor requirements](#gen-zten-reqs)
- `zdnn_ztensor *bias`
- The bias tensor to add to the convoluted results.
- Must be a [ZDNN_1D](#common-layouts) tensor with pre_transformed shape
[channels_out].
- See [Convolution 2D Requirements](#convolution-2d-requirements) for
requirements.
- Must follow [general tensor requirements](#gen-zten-reqs)
- `zdnn_pool_padding padding_type`
- The type of padding to use for the pooling operations.
- Valid values: are `SAME_PADDING` or `VALID_PADDING`.
- For information on "same" vs "valid" padding see:
.
- `uint32_t stride_height`
- Number of positions the kernel moves over the input's `dim3` dimension at
each step.
- See [Convolution 2D Requirements](#convolution-2d-requirements) for
requirements.
- `uint32_t stride_width`
- Number of positions the kernel moves over the input's `dim2` dimension at
each step.
- See [Convolution 2D Requirements](#convolution-2d-requirements) for
requirements.
- `zdnn_conv2d_act act_func`
- Activation function to apply to the results.
- `CONV2D_ACT_NONE` or `CONV2D_ACT_RELU`
- `void *clipping_value`
- A pointer to an FP32 value, used to clip input tensor's elements.
- If set to NULL or 0, no clipping will occur.
- Must not be a negative value.
- Value is ignored if `act_func` is not set to `CONV2D_ACT_RELU`.
- `zdnn_ztensor *output`
- The result tensor which will hold the results.
- Must be a [ZDNN_NHWC](#common-layouts) tensor with pre_transformed shape
[num_batches, height_out, width_out, channels_out].
- See [Convolution 2D Requirements](#convolution-2d-requirements) for
requirements.
- Must follow [general tensor requirements](#gen-zten-reqs)
#### Convolution 2D Requirements
| strides and padding | input (num_batches, height_in, width_in, channels_in) | kernel (kernel_height, kernel_width, channels_in, channels_out) | bias (channels_out) | output (num_batches, height_out, width_out, channels_out) |
| ----------------------------------------- | ---------------------------------------------------------------------- | --------------------------------------------------------------- | ------------------- | -------------------------------------------------------------------------------------------------------------------------------- |
| both strides > 0 and =< 13, SAME padding | | both kernel_height and kernel_width must be =< 64 | | height_out = ceil(height_in/stride_height)
width_out = ceil(width_in/stride_width) |
| both strides > 0 and =< 13, VALID padding | height_in must be >= kernel_height
width_in must be >= kernel_width | both kernel_height and kernel_width must be =< 64 | | height_out = ceil((height_in - kernel_height + 1)/stride_height)
width_out = ceil((width_in - kernel_width + 1)/stride_width) |
| both strides = 0, VALID padding | height_in must be = kernel_height
width_in must be = kernel_width | both kernel_height and kernel_width must be =< 448 | | both height_out and width_out must be 1 |
#### Returns (see [zDNN Statuses](#common-statuses) for descriptions)
- `ZDNN_OK`
- [warning statuses](#warning-statuses)
- `ZDNN_INVALID_SHAPE`
- Shape of input or output tensor is invalid based on given kernel and stride
parameters
- Other general shape violations (exceeds MDIS, etc.)
- `ZDNN_INVALID_TYPE`
- `ZDNN_INVALID_FORMAT`
- `ZDNN_INVALID_STRIDE_PADDING`
- `ZDNN_INVALID_STRIDES`
- `ZDNN_INVALID_CLIPPING_VALUE`
- [hardware statuses](#hw-statuses)
- `ZDNN_FUNC_RC_F000` - Invalid `padding_type`
- `ZDNN_FUNC_RC_F001` - Invalid `act_func`
- `ZDNN_FUNC_RC_F002` - `stride_height` = 0 and `stride_width` = 0, but either
`kernel_height` or `kernel_width` > 448
- `ZDNN_FUNC_RC_F003` - `stride_height` > 0 and `stride_width` > 0, but either
`kernel_height` or `kernel_width` > 64
- `ZDNN_FUNC_RC_F004` - Either `stride_height` or `stride_width` > 13
#### Since
1.0.0
#### Requirements
This feature requires that:
- `zdnn_is_nnpa_installed()` returns true
- the underlying hardware supports zDNN APIs 1.1.x or later at runtime
See [Validating the environment at runtime](#runtime-val).
#### Framework Examples
[TensorFlow Conv2D](https://www.tensorflow.org/api_docs/python/tf/keras/layers/Conv2D)
[ONNX Conv2D](https://onnx.ai/onnx/operators/onnx__Conv.html#l-onnx-doc-conv)
## Convenience Functions
[Back to Table of Contents](#TOC)
- None
---
## Usage Examples
### Example flow of an application calling the zDNN APIs
[Back to Table of Contents](#TOC)
```C
#include
#include
#include
#include
#include
#include "zdnn.h"
// ***************************************************************************
// Sample:
//
// Create 2 zTensors a and b, and add them together via zdnn_add()
// ***************************************************************************
int main(int argc, char *argv[]) {
zdnn_tensor_desc pre_tfrmd_desc, tfrmd_desc;
zdnn_ztensor ztensor_a;
zdnn_ztensor ztensor_b;
zdnn_ztensor ztensor_out;
zdnn_status status;
uint32_t dim_n = 1, dim_h = 32, dim_w = 32, dim_c = 3;
zdnn_data_types type = FP32;
short element_size = 4; // size of each element in bytes
uint64_t num_elements = dim_n * dim_h * dim_w * dim_c;
// allocate tensor data storage
void *data1 = malloc(num_elements * element_size);
void *data2 = malloc(num_elements * element_size);
void *data_out = malloc(num_elements * element_size);
// read input_data
// check status for zAIU availability, supported ops, etc. here
// status = zdnn_query();
// set input tensor data to 0 to 127 sequentially and repeat
for (uint64_t i = 0; i < num_elements; i++) {
((float *)data1)[i] = (float)(i & 0x7f);
((float *)data2)[i] = (float)(i & 0x7f);
}
zdnn_init_pre_transformed_desc(ZDNN_NHWC, type, &pre_tfrmd_desc, dim_n, dim_h,
dim_w, dim_c);
// generate transformed shape information
status = zdnn_generate_transformed_desc(&pre_tfrmd_desc, &tfrmd_desc);
assert(status == ZDNN_OK);
// initialize zTensors and allocate 4k-aligned storage via helper function
status =
zdnn_init_ztensor_with_malloc(&pre_tfrmd_desc, &tfrmd_desc, &ztensor_a);
assert(status == ZDNN_OK);
status =
zdnn_init_ztensor_with_malloc(&pre_tfrmd_desc, &tfrmd_desc, &ztensor_b);
assert(status == ZDNN_OK);
status =
zdnn_init_ztensor_with_malloc(&pre_tfrmd_desc, &tfrmd_desc, &ztensor_out);
assert(status == ZDNN_OK);
// transform the feature tensor
status = zdnn_transform_ztensor(&ztensor_a, data1);
assert(status == ZDNN_OK);
status = zdnn_transform_ztensor(&ztensor_b, data2);
assert(status == ZDNN_OK);
// perform element-wise add between the two input tensors
status = zdnn_add(&ztensor_a, &ztensor_b, &ztensor_out);
assert(status == ZDNN_OK);
// transform resultant zTensor back to original data format
status = zdnn_transform_origtensor(&ztensor_out, data_out);
assert(status == ZDNN_OK);
for (uint64_t i = 0; i < num_elements; i++) {
printf("out element %" PRIu64 " %f\n", i, ((float *)data_out)[i]);
}
// Free zTensors
status = zdnn_free_ztensor_buffer(&ztensor_a);
assert(status == ZDNN_OK);
status = zdnn_free_ztensor_buffer(&ztensor_b);
assert(status == ZDNN_OK);
status = zdnn_free_ztensor_buffer(&ztensor_out);
assert(status == ZDNN_OK);
free(data1);
free(data2);
free(data_out);
}
```
---
### Example of calling the zdnn_quantized_matmul_op API (normal)
[Back to Table of Contents](#TOC)
```C
// SPDX-License-Identifier: Apache-2.0
/*
* Copyright IBM Corp. 2021, 2024
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#include
#include
#include
#include
#include "zdnn.h"
// Sample: Quantized Matmul
int main(int argc, char *argv[]) {
zdnn_status status;
#ifdef STATIC_LIB
zdnn_init();
#endif
/***********************************************************************
*
* Quantized Matmul:
*
* INPUTS --------------------------------------------------------------
* input | ZDNN_3DS | (s, m, n)
* weights | ZDNN_3DS | (s, n, p)
* input_biases | ZDNN_2DS | (s, p)
*
* OUTPUTS -------------------------------------------------------------
* output | ZDNN_3DS | (s, m, p)
***********************************************************************/
uint32_t s = 2;
uint32_t m = 3;
uint32_t n = 4;
uint32_t p = 5;
short int8_size = 1; // size of each int8 element in bytes
short float_size = 4; // size of each float element in bytes
/***********************************************************************
* Create input zTensor
***********************************************************************/
zdnn_tensor_desc input_pre_tfrmd_desc, input_tfrmd_desc;
zdnn_ztensor input;
zdnn_init_pre_transformed_desc(ZDNN_3DS, FP32, &input_pre_tfrmd_desc,
s, m, n);
status = zdnn_generate_quantized_transformed_desc(
&input_pre_tfrmd_desc, QUANTIZED_INT8, &input_tfrmd_desc);
assert(status == ZDNN_OK);
float input_scale = 1.f;
float input_offset = 0.f;
status = zdnn_init_quantized_ztensor_with_malloc(&input_pre_tfrmd_desc,
&input_tfrmd_desc,
input_scale, input_offset,
&input);
assert(status == ZDNN_OK);
uint64_t input_data_size = s * m * n * float_size;
void *input_data = malloc(input_data_size);
status = zdnn_transform_quantized_ztensor(&input, false, INT8_MIN, INT8_MAX,
input_data);
assert(status == ZDNN_OK);
/***********************************************************************
* Create weights zTensor
***********************************************************************/
zdnn_tensor_desc weights_pre_tfrmd_desc, weights_tfrmd_desc;
zdnn_ztensor weights;
zdnn_init_pre_transformed_desc(ZDNN_3DS, INT8, &weights_pre_tfrmd_desc,
s, n, p);
status = zdnn_generate_quantized_transformed_desc(
&weights_pre_tfrmd_desc, QUANTIZED_WEIGHTS_INT8, &weights_tfrmd_desc);
assert(status == ZDNN_OK);
float weights_scale = 1.f;
float weights_offset = 0.f;
status = zdnn_init_quantized_ztensor_with_malloc(&weights_pre_tfrmd_desc,
&weights_tfrmd_desc,
weights_scale,
weights_offset, &weights);
assert(status == ZDNN_OK);
uint64_t weights_data_size = s * n * p * int8_size;
void *weights_data = malloc(weights_data_size);
status = zdnn_transform_quantized_ztensor(&weights, false, INT8_MIN, INT8_MAX,
weights_data);
assert(status == ZDNN_OK);
/***********************************************************************
* Create biases zTensor
***********************************************************************/
zdnn_tensor_desc biases_pre_tfrmd_desc, biases_tfrmd_desc;
zdnn_ztensor biases;
zdnn_init_pre_transformed_desc(ZDNN_2DS, FP32, &biases_pre_tfrmd_desc,
s, p);
status = zdnn_generate_quantized_transformed_desc(
&biases_pre_tfrmd_desc, QUANTIZED_INT8, &biases_tfrmd_desc);
assert(status == ZDNN_OK);
float biases_scale = 1.f;
float biases_offset = 0.f;
status = zdnn_init_quantized_ztensor_with_malloc(&biases_pre_tfrmd_desc,
&biases_tfrmd_desc,
biases_scale, biases_offset,
&biases);
assert(status == ZDNN_OK);
uint64_t biases_data_size = s * p * float_size;
void *biases_data = malloc(biases_data_size);
status = zdnn_transform_quantized_ztensor(&biases, false, INT8_MIN, INT8_MAX,
biases_data);
assert(status == ZDNN_OK);
/***********************************************************************
* Create output zTensor
***********************************************************************/
zdnn_tensor_desc output_pre_tfrmd_desc, output_tfrmd_desc;
zdnn_ztensor output;
zdnn_init_pre_transformed_desc(ZDNN_3DS, FP32, &output_pre_tfrmd_desc,
s, m, p);
status = zdnn_generate_quantized_transformed_desc(
&output_pre_tfrmd_desc, QUANTIZED_DLFLOAT16, &output_tfrmd_desc);
assert(status == ZDNN_OK);
float output_scale = 1.f;
float output_offset = 0.f;
status = zdnn_init_quantized_ztensor_with_malloc(&output_pre_tfrmd_desc,
&output_tfrmd_desc,
output_scale, output_offset,
&output);
assert(status == ZDNN_OK);
/***********************************************************************
* Call the zAIU
***********************************************************************/
status = zdnn_quantized_matmul_op(&input, &weights, &biases,
MATMUL_OP_ADDITION, INT8_MIN, INT8_MAX,
NULL, &output);
assert(status == ZDNN_OK);
/***********************************************************************
* Output and Cleanup
***********************************************************************/
uint64_t output_data_size = s * m * p * float_size;
void *output_data = malloc(output_data_size);
status = zdnn_transform_origtensor(&output, output_data);
assert(status == ZDNN_OK);
status = zdnn_free_ztensor_buffer(&input);
assert(status == ZDNN_OK);
status = zdnn_free_ztensor_buffer(&weights);
assert(status == ZDNN_OK);
status = zdnn_free_ztensor_buffer(&biases);
assert(status == ZDNN_OK);
status = zdnn_free_ztensor_buffer(&output);
assert(status == ZDNN_OK);
free(input_data);
free(weights_data);
free(biases_data);
free(output_data);
}
```
---
### Example of calling the zdnn_quantized_matmul_op API (on-the-fly)
[Back to Table of Contents](#TOC)
```C
// SPDX-License-Identifier: Apache-2.0
/*
* Copyright IBM Corp. 2021, 2024
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#include
#include
#include
#include
#include "zdnn.h"
// Sample: Quantized Matmul on-the-fly
int main(int argc, char *argv[]) {
zdnn_status status;
#ifdef STATIC_LIB
zdnn_init();
#endif
/***********************************************************************
*
* Quantized Matmul on-the-fly:
*
* INPUTS --------------------------------------------------------------
* input | ZDNN_3DS | (s, m, n)
* weights | ZDNN_3DS | (s, n, p)
* input_biases | ZDNN_2DS | (s, p)
*
* OUTPUTS -------------------------------------------------------------
* output | ZDNN_3DS | (s, m, p)
***********************************************************************/
uint32_t s = 2;
uint32_t m = 3;
uint32_t n = 4;
uint32_t p = 5;
short int8_size = 1; // size of each int8 element in bytes
short float_size = 4; // size of each float element in bytes
/***********************************************************************
* Create input zTensor
***********************************************************************/
zdnn_tensor_desc input_pre_tfrmd_desc, input_tfrmd_desc;
zdnn_ztensor input;
zdnn_init_pre_transformed_desc(ZDNN_3DS, FP32, &input_pre_tfrmd_desc,
s, m, n);
status = zdnn_generate_quantized_transformed_desc(
&input_pre_tfrmd_desc, QUANTIZED_DLFLOAT16, &input_tfrmd_desc);
assert(status == ZDNN_OK);
float input_scale = 1.f;
float input_offset = 0.f;
status = zdnn_init_quantized_ztensor_with_malloc(&input_pre_tfrmd_desc,
&input_tfrmd_desc,
input_scale, input_offset,
&input);
assert(status == ZDNN_OK);
uint64_t input_data_size = s * m * n * float_size;
void *input_data = malloc(input_data_size);
status = zdnn_transform_ztensor(&input, input_data);
assert(status == ZDNN_OK);
/***********************************************************************
* Create weights zTensor
***********************************************************************/
zdnn_tensor_desc weights_pre_tfrmd_desc, weights_tfrmd_desc;
zdnn_ztensor weights;
zdnn_init_pre_transformed_desc(ZDNN_3DS, INT8, &weights_pre_tfrmd_desc,
s, n, p);
status = zdnn_generate_quantized_transformed_desc(
&weights_pre_tfrmd_desc, QUANTIZED_WEIGHTS_INT8, &weights_tfrmd_desc);
assert(status == ZDNN_OK);
float weights_scale = 1.f;
float weights_offset = 0.f;
status = zdnn_init_quantized_ztensor_with_malloc(&weights_pre_tfrmd_desc,
&weights_tfrmd_desc,
weights_scale,
weights_offset, &weights);
assert(status == ZDNN_OK);
uint64_t weights_data_size = s * n * p * int8_size;
void *weights_data = malloc(weights_data_size);
status = zdnn_transform_quantized_ztensor(&weights, false, INT8_MIN, INT8_MAX,
weights_data);
assert(status == ZDNN_OK);
/***********************************************************************
* Create biases zTensor
***********************************************************************/
zdnn_tensor_desc biases_pre_tfrmd_desc, biases_tfrmd_desc;
zdnn_ztensor biases;
zdnn_init_pre_transformed_desc(ZDNN_2DS, FP32, &biases_pre_tfrmd_desc,
s, p);
status = zdnn_generate_quantized_transformed_desc(
&biases_pre_tfrmd_desc, QUANTIZED_INT8, &biases_tfrmd_desc);
assert(status == ZDNN_OK);
float biases_scale = 1.f;
float biases_offset = 0.f;
status = zdnn_init_quantized_ztensor_with_malloc(&biases_pre_tfrmd_desc,
&biases_tfrmd_desc,
biases_scale, biases_offset,
&biases);
assert(status == ZDNN_OK);
uint64_t biases_data_size = s * p * float_size;
void *biases_data = malloc(biases_data_size);
status = zdnn_transform_quantized_ztensor(&biases, false, INT8_MIN, INT8_MAX,
biases_data);
assert(status == ZDNN_OK);
/***********************************************************************
* Create output zTensor
***********************************************************************/
zdnn_tensor_desc output_pre_tfrmd_desc, output_tfrmd_desc;
zdnn_ztensor output;
zdnn_init_pre_transformed_desc(ZDNN_3DS, FP32, &output_pre_tfrmd_desc,
s, m, p);
status = zdnn_generate_quantized_transformed_desc(
&output_pre_tfrmd_desc, QUANTIZED_DLFLOAT16, &output_tfrmd_desc);
assert(status == ZDNN_OK);
float output_scale = 1.f;
float output_offset = 0.f;
status = zdnn_init_quantized_ztensor_with_malloc(&output_pre_tfrmd_desc,
&output_tfrmd_desc,
output_scale, output_offset,
&output);
assert(status == ZDNN_OK);
/***********************************************************************
* Call the zAIU
***********************************************************************/
status = zdnn_quantized_matmul_op(&input, &weights, &biases,
MATMUL_OP_ADDITION, INT8_MIN, INT8_MAX,
NULL, &output);
assert(status == ZDNN_OK);
/***********************************************************************
* Output and Cleanup
***********************************************************************/
uint64_t output_data_size = s * m * p * float_size;
void *output_data = malloc(output_data_size);
status = zdnn_transform_origtensor(&output, output_data);
assert(status == ZDNN_OK);
status = zdnn_free_ztensor_buffer(&input);
assert(status == ZDNN_OK);
status = zdnn_free_ztensor_buffer(&weights);
assert(status == ZDNN_OK);
status = zdnn_free_ztensor_buffer(&biases);
assert(status == ZDNN_OK);
status = zdnn_free_ztensor_buffer(&output);
assert(status == ZDNN_OK);
free(input_data);
free(weights_data);
free(biases_data);
free(output_data);
}
```
---
### Example of calling the zdnn_quantized_matmul with pre_computed=true API (normal)
[Back to Table of Contents](#TOC)
```C
// SPDX-License-Identifier: Apache-2.0
/*
* Copyright IBM Corp. 2021, 2024
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#include
#include
#include
#include
#include "zdnn.h"
// Sample: Quantized Matmul Pre-Computed
int main(int argc, char *argv[]) {
zdnn_status status;
#ifdef STATIC_LIB
zdnn_init();
#endif
/***********************************************************************
*
* Quantized Matmul Pre-Computed:
*
* INPUTS --------------------------------------------------------------
* input | ZDNN_3DS | (s, m, n)
* weights | ZDNN_3DS | (s, n, p)
* input_biases | ZDNN_2DS | (s, p)
*
* OUTPUTS -------------------------------------------------------------
* output | ZDNN_3DS | (s, m, p)
***********************************************************************/
uint32_t s = 2;
uint32_t m = 3;
uint32_t n = 4;
uint32_t p = 5;
short int8_size = 1; // size of each int8 element in bytes
short float_size = 4; // size of each float element in bytes
/***********************************************************************
* Create input zTensor
***********************************************************************/
zdnn_tensor_desc input_pre_tfrmd_desc, input_tfrmd_desc;
zdnn_ztensor input;
zdnn_init_pre_transformed_desc(ZDNN_3DS, FP32, &input_pre_tfrmd_desc,
s, m, n);
status = zdnn_generate_quantized_transformed_desc(
&input_pre_tfrmd_desc, QUANTIZED_INT8, &input_tfrmd_desc);
assert(status == ZDNN_OK);
float input_scale = 1.f;
float input_offset = 0.f;
status = zdnn_init_quantized_ztensor_with_malloc(&input_pre_tfrmd_desc,
&input_tfrmd_desc,
input_scale, input_offset,
&input);
assert(status == ZDNN_OK);
uint64_t input_data_size = s * m * n * float_size;
void *input_data = malloc(input_data_size);
status = zdnn_transform_quantized_ztensor(&input, false, INT8_MIN, INT8_MAX,
input_data);
assert(status == ZDNN_OK);
/***********************************************************************
* Create weights zTensor
***********************************************************************/
zdnn_tensor_desc weights_pre_tfrmd_desc, weights_tfrmd_desc;
zdnn_ztensor weights;
zdnn_init_pre_transformed_desc(ZDNN_3DS, INT8, &weights_pre_tfrmd_desc,
s, n, p);
status = zdnn_generate_quantized_transformed_desc(
&weights_pre_tfrmd_desc, QUANTIZED_WEIGHTS_INT8, &weights_tfrmd_desc);
assert(status == ZDNN_OK);
float weights_scale = 1.f;
float weights_offset = 0.f;
status = zdnn_init_quantized_ztensor_with_malloc(&weights_pre_tfrmd_desc,
&weights_tfrmd_desc,
weights_scale,
weights_offset, &weights);
assert(status == ZDNN_OK);
uint64_t weights_data_size = s * n * p * int8_size;
void *weights_data = malloc(weights_data_size);
status = zdnn_transform_quantized_ztensor(&weights, false, INT8_MIN, INT8_MAX,
weights_data);
assert(status == ZDNN_OK);
/***********************************************************************
* Create pre-computed biases zTensor
***********************************************************************/
zdnn_tensor_desc biases_pre_tfrmd_desc, biases_tfrmd_desc;
zdnn_ztensor biases;
zdnn_init_pre_transformed_desc(ZDNN_2DS, FP32, &biases_pre_tfrmd_desc,
s, p);
status = zdnn_generate_quantized_transformed_desc(
&biases_pre_tfrmd_desc, QUANTIZED_DLFLOAT16, &biases_tfrmd_desc);
assert(status == ZDNN_OK);
float biases_scale = 1.f;
float biases_offset = 0.f;
status = zdnn_init_quantized_ztensor_with_malloc(&biases_pre_tfrmd_desc,
&biases_tfrmd_desc,
biases_scale, biases_offset,
&biases);
assert(status == ZDNN_OK);
uint64_t biases_data_size = s * p * float_size;
void *biases_data = malloc(biases_data_size);
status = zdnn_transform_ztensor(&biases, biases_data);
assert(status == ZDNN_OK);
/***********************************************************************
* Create output zTensor
***********************************************************************/
zdnn_tensor_desc output_pre_tfrmd_desc, output_tfrmd_desc;
zdnn_ztensor output;
zdnn_init_pre_transformed_desc(ZDNN_3DS, FP32, &output_pre_tfrmd_desc,
s, m, p);
status = zdnn_generate_quantized_transformed_desc(
&output_pre_tfrmd_desc, QUANTIZED_DLFLOAT16, &output_tfrmd_desc);
assert(status == ZDNN_OK);
float output_scale = 1.f;
float output_offset = 0.f;
status = zdnn_init_quantized_ztensor_with_malloc(&output_pre_tfrmd_desc,
&output_tfrmd_desc,
output_scale, output_offset,
&output);
assert(status == ZDNN_OK);
/***********************************************************************
* Call the zAIU
***********************************************************************/
status = zdnn_quantized_matmul_op(&input, &weights, &biases,
MATMUL_OP_ADDITION, INT8_MIN,
INT8_MAX, false, true, NULL, &output);
assert(status == ZDNN_OK);
/***********************************************************************
* Output and Cleanup
***********************************************************************/
uint64_t output_data_size = s * m * p * float_size;
void *output_data = malloc(output_data_size);
status = zdnn_transform_origtensor(&output, output_data);
assert(status == ZDNN_OK);
status = zdnn_free_ztensor_buffer(&input);
assert(status == ZDNN_OK);
status = zdnn_free_ztensor_buffer(&weights);
assert(status == ZDNN_OK);
status = zdnn_free_ztensor_buffer(&biases);
assert(status == ZDNN_OK);
status = zdnn_free_ztensor_buffer(&output);
assert(status == ZDNN_OK);
free(input_data);
free(weights_data);
free(biases_data);
free(output_data);
}
```
---
### Example of calling the zdnn_quantized_matmul_op with pre_computed=true API (on-the-fly)
[Back to Table of Contents](#TOC)
```C
// SPDX-License-Identifier: Apache-2.0
/*
* Copyright IBM Corp. 2021, 2024
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#include
#include
#include
#include
#include "zdnn.h"
// Sample: Quantized Matmul Pre-Computed on-the-fly
int main(int argc, char *argv[]) {
zdnn_status status;
#ifdef STATIC_LIB
zdnn_init();
#endif
/***********************************************************************
*
* Quantized Matmul Pre-Computed on-the-fly:
*
* INPUTS --------------------------------------------------------------
* input | ZDNN_3DS | (s, m, n)
* weights | ZDNN_3DS | (s, n, p)
* input_biases | ZDNN_2DS | (s, p)
*
* OUTPUTS -------------------------------------------------------------
* output | ZDNN_3DS | (s, m, p)
***********************************************************************/
uint32_t s = 2;
uint32_t m = 3;
uint32_t n = 4;
uint32_t p = 5;
short int8_size = 1; // size of each int8 element in bytes
short float_size = 4; // size of each float element in bytes
/***********************************************************************
* Create input zTensor
***********************************************************************/
zdnn_tensor_desc input_pre_tfrmd_desc, input_tfrmd_desc;
zdnn_ztensor input;
zdnn_init_pre_transformed_desc(ZDNN_3DS, FP32, &input_pre_tfrmd_desc,
s, m, n);
status = zdnn_generate_quantized_transformed_desc(
&input_pre_tfrmd_desc, QUANTIZED_DLFLOAT16, &input_tfrmd_desc);
assert(status == ZDNN_OK);
float input_scale = 1.f;
float input_offset = 0.f;
status = zdnn_init_quantized_ztensor_with_malloc(&input_pre_tfrmd_desc,
&input_tfrmd_desc,
input_scale, input_offset,
&input);
assert(status == ZDNN_OK);
uint64_t input_data_size = s * m * n * float_size;
void *input_data = malloc(input_data_size);
status = zdnn_transform_ztensor(&input, input_data);
assert(status == ZDNN_OK);
/***********************************************************************
* Create weights zTensor
***********************************************************************/
zdnn_tensor_desc weights_pre_tfrmd_desc, weights_tfrmd_desc;
zdnn_ztensor weights;
zdnn_init_pre_transformed_desc(ZDNN_3DS, INT8, &weights_pre_tfrmd_desc,
s, n, p);
status = zdnn_generate_quantized_transformed_desc(
&weights_pre_tfrmd_desc, QUANTIZED_WEIGHTS_INT8, &weights_tfrmd_desc);
assert(status == ZDNN_OK);
float weights_scale = 1.f;
float weights_offset = 0.f;
status = zdnn_init_quantized_ztensor_with_malloc(&weights_pre_tfrmd_desc,
&weights_tfrmd_desc,
weights_scale,
weights_offset, &weights);
assert(status == ZDNN_OK);
uint64_t weights_data_size = s * n * p * int8_size;
void *weights_data = malloc(weights_data_size);
status = zdnn_transform_quantized_ztensor(&weights, false, INT8_MIN, INT8_MAX,
weights_data);
assert(status == ZDNN_OK);
/***********************************************************************
* Create pre-computed biases zTensor
***********************************************************************/
zdnn_tensor_desc biases_pre_tfrmd_desc, biases_tfrmd_desc;
zdnn_ztensor biases;
zdnn_init_pre_transformed_desc(ZDNN_2DS, FP32, &biases_pre_tfrmd_desc,
s, p);
status = zdnn_generate_quantized_transformed_desc(
&biases_pre_tfrmd_desc, QUANTIZED_DLFLOAT16, &biases_tfrmd_desc);
assert(status == ZDNN_OK);
float biases_scale = 1.f;
float biases_offset = 0.f;
status = zdnn_init_quantized_ztensor_with_malloc(&biases_pre_tfrmd_desc,
&biases_tfrmd_desc,
biases_scale, biases_offset,
&biases);
assert(status == ZDNN_OK);
uint64_t biases_data_size = s * p * float_size;
void *biases_data = malloc(biases_data_size);
status = zdnn_transform_ztensor(&biases, biases_data);
assert(status == ZDNN_OK);
/***********************************************************************
* Create output zTensor
***********************************************************************/
zdnn_tensor_desc output_pre_tfrmd_desc, output_tfrmd_desc;
zdnn_ztensor output;
zdnn_init_pre_transformed_desc(ZDNN_3DS, FP32, &output_pre_tfrmd_desc,
s, m, p);
status = zdnn_generate_quantized_transformed_desc(
&output_pre_tfrmd_desc, QUANTIZED_DLFLOAT16, &output_tfrmd_desc);
assert(status == ZDNN_OK);
float output_scale = 1.f;
float output_offset = 0.f;
status = zdnn_init_quantized_ztensor_with_malloc(&output_pre_tfrmd_desc,
&output_tfrmd_desc,
output_scale, output_offset,
&output);
assert(status == ZDNN_OK);
/***********************************************************************
* Call the zAIU
***********************************************************************/
status = zdnn_quantized_matmul_op(&input, &weights, &biases,
MATMUL_OP_ADDITION, INT8_MIN,
INT8_MAX, false, true, NULL, &output);
assert(status == ZDNN_OK);
/***********************************************************************
* Output and Cleanup
***********************************************************************/
uint64_t output_data_size = s * m * p * float_size;
void *output_data = malloc(output_data_size);
status = zdnn_transform_origtensor(&output, output_data);
assert(status == ZDNN_OK);
status = zdnn_free_ztensor_buffer(&input);
assert(status == ZDNN_OK);
status = zdnn_free_ztensor_buffer(&weights);
assert(status == ZDNN_OK);
status = zdnn_free_ztensor_buffer(&biases);
assert(status == ZDNN_OK);
status = zdnn_free_ztensor_buffer(&output);
assert(status == ZDNN_OK);
free(input_data);
free(weights_data);
free(biases_data);
free(output_data);
}
```
---
### Example of an application calling the zdnn_lstm API (forward)
[Back to Table of Contents](#TOC)
```C
// SPDX-License-Identifier: Apache-2.0
/*
* Copyright IBM Corp. 2021
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#include
#include
#include
#include
#include "zdnn.h"
// Sample: LSTM
int main(int argc, char *argv[]) {
zdnn_status status;
#ifdef STATIC_LIB
zdnn_init();
#endif
/***********************************************************************
*
* LSTM (FWD/BWD):
*
* INPUTS --------------------------------------------------------------
* input | ZDNN_3DS | (num_timesteps, num_batches, num_features)
* h0 | ZDNN_3DS | (1, num_batches, num_hidden)
* c0 | ZDNN_3DS | (1, num_batches, num_hidden)
* weights | ZDNN_3DS | (1, num_features, num_hidden)
* biases | ZDNN_2DS | (1, num_hidden)
* hidden_weights | ZDNN_3DS | (1, num_hidden, num_hidden)
* hidden_biases | ZDNN_2DS | (1, num_hidden)
*
* OUTPUTS -------------------------------------------------------------
* hn_output | ZDNN_4DS | (num_timesteps, 1, num_batches, num_hidden)
* | | or (1, 1, num_batches, num_hidden)
* cf_output | ZDNN_4DS | (1, 1, num_batches, num_hidden)
***********************************************************************/
/***********************************************************************
* Create input zTensor
***********************************************************************/
zdnn_tensor_desc input_pre_tfrmd_desc, input_tfrmd_desc;
zdnn_ztensor input;
uint32_t num_timesteps = 5;
uint32_t num_batches = 3;
uint32_t num_features = 32;
uint32_t num_hidden = 5;
zdnn_data_types type = FP32;
short element_size = 4; // size of each element in bytes
lstm_gru_direction dir = FWD;
uint8_t num_dirs = 1;
zdnn_init_pre_transformed_desc(ZDNN_3DS, type, &input_pre_tfrmd_desc,
num_timesteps, num_batches, num_features);
status =
zdnn_generate_transformed_desc(&input_pre_tfrmd_desc, &input_tfrmd_desc);
assert(status == ZDNN_OK);
status = zdnn_init_ztensor_with_malloc(&input_pre_tfrmd_desc,
&input_tfrmd_desc, &input);
assert(status == ZDNN_OK);
uint64_t input_data_size =
num_timesteps * num_batches * num_features * element_size;
void *input_data = malloc(input_data_size);
status = zdnn_transform_ztensor(&input, input_data);
assert(status == ZDNN_OK);
/***********************************************************************
* Create initial hidden and cell state zTensors
***********************************************************************/
zdnn_tensor_desc h0c0_pre_tfrmd_desc, h0c0_tfrmd_desc;
zdnn_ztensor h0, c0;
zdnn_init_pre_transformed_desc(ZDNN_3DS, type, &h0c0_pre_tfrmd_desc, num_dirs,
num_batches, num_hidden);
status =
zdnn_generate_transformed_desc(&h0c0_pre_tfrmd_desc, &h0c0_tfrmd_desc);
assert(status == ZDNN_OK);
status = zdnn_init_ztensor_with_malloc(&h0c0_pre_tfrmd_desc, &h0c0_tfrmd_desc,
&h0);
assert(status == ZDNN_OK);
status = zdnn_init_ztensor_with_malloc(&h0c0_pre_tfrmd_desc, &h0c0_tfrmd_desc,
&c0);
assert(status == ZDNN_OK);
uint64_t h0c0_data_size = num_batches * num_hidden * element_size;
void *hidden_state_data = malloc(h0c0_data_size);
void *cell_state_data = malloc(h0c0_data_size);
status = zdnn_transform_ztensor(&h0, hidden_state_data);
assert(status == ZDNN_OK);
status = zdnn_transform_ztensor(&c0, cell_state_data);
assert(status == ZDNN_OK);
/***********************************************************************
* Create input weights zTensor
* Resultant zTensor is concatenated
***********************************************************************/
zdnn_tensor_desc weights_pre_tfrmd_desc, weights_tfrmd_desc;
zdnn_ztensor weights;
zdnn_init_pre_transformed_desc(ZDNN_3DS, type, &weights_pre_tfrmd_desc,
num_dirs, num_features, num_hidden);
status = zdnn_generate_transformed_desc_concatenated(
&weights_pre_tfrmd_desc, RNN_TYPE_LSTM | USAGE_WEIGHTS | PREV_LAYER_NONE,
&weights_tfrmd_desc);
assert(status == ZDNN_OK);
status = zdnn_init_ztensor_with_malloc(&weights_pre_tfrmd_desc,
&weights_tfrmd_desc, &weights);
assert(status == ZDNN_OK);
uint64_t weights_data_size = num_features * num_hidden * element_size;
void *weights_data_f = malloc(weights_data_size);
void *weights_data_i = malloc(weights_data_size);
void *weights_data_c = malloc(weights_data_size);
void *weights_data_o = malloc(weights_data_size);
status = zdnn_transform_ztensor(&weights, weights_data_f, weights_data_i,
weights_data_c, weights_data_o);
assert(status == ZDNN_OK);
/***********************************************************************
* Create biases zTensors
* Resultant zTensors are concatenated
***********************************************************************/
zdnn_tensor_desc biases_pre_tfrmd_desc, biases_tfrmd_desc;
zdnn_ztensor biases;
zdnn_init_pre_transformed_desc(ZDNN_2DS, type, &biases_pre_tfrmd_desc,
num_dirs, num_hidden);
status = zdnn_generate_transformed_desc_concatenated(
&biases_pre_tfrmd_desc, RNN_TYPE_LSTM | USAGE_BIASES | PREV_LAYER_NONE,
&biases_tfrmd_desc);
assert(status == ZDNN_OK);
status = zdnn_init_ztensor_with_malloc(&biases_pre_tfrmd_desc,
&biases_tfrmd_desc, &biases);
assert(status == ZDNN_OK);
uint64_t biases_data_size = num_hidden * element_size;
void *biases_data_f = malloc(biases_data_size);
void *biases_data_i = malloc(biases_data_size);
void *biases_data_c = malloc(biases_data_size);
void *biases_data_o = malloc(biases_data_size);
status = zdnn_transform_ztensor(&biases, biases_data_f, biases_data_i,
biases_data_c, biases_data_o);
assert(status == ZDNN_OK);
/***********************************************************************
* Create hidden weights zTensor
* Resultant zTensor is concatenated
***********************************************************************/
zdnn_tensor_desc hidden_weights_pre_tfrmd_desc, hidden_weights_tfrmd_desc;
zdnn_ztensor hidden_weights;
zdnn_init_pre_transformed_desc(ZDNN_3DS, type, &hidden_weights_pre_tfrmd_desc,
num_dirs, num_hidden, num_hidden);
status = zdnn_generate_transformed_desc_concatenated(
&hidden_weights_pre_tfrmd_desc,
RNN_TYPE_LSTM | USAGE_HIDDEN_WEIGHTS | PREV_LAYER_NONE,
&hidden_weights_tfrmd_desc);
assert(status == ZDNN_OK);
status = zdnn_init_ztensor_with_malloc(&hidden_weights_pre_tfrmd_desc,
&hidden_weights_tfrmd_desc,
&hidden_weights);
assert(status == ZDNN_OK);
uint64_t hidden_weights_data_size = num_hidden * num_hidden * element_size;
void *hidden_weights_data_f = malloc(hidden_weights_data_size);
void *hidden_weights_data_i = malloc(hidden_weights_data_size);
void *hidden_weights_data_c = malloc(hidden_weights_data_size);
void *hidden_weights_data_o = malloc(hidden_weights_data_size);
status = zdnn_transform_ztensor(&hidden_weights, hidden_weights_data_f,
hidden_weights_data_i, hidden_weights_data_c,
hidden_weights_data_o);
assert(status == ZDNN_OK);
/***********************************************************************
* Create hidden biases zTensors
* Resultant zTensors are concatenated
***********************************************************************/
zdnn_tensor_desc hidden_biases_pre_tfrmd_desc, hidden_biases_tfrmd_desc;
zdnn_ztensor hidden_biases;
zdnn_init_pre_transformed_desc(ZDNN_2DS, type, &hidden_biases_pre_tfrmd_desc,
num_dirs, num_hidden);
status = zdnn_generate_transformed_desc_concatenated(
&hidden_biases_pre_tfrmd_desc,
RNN_TYPE_LSTM | USAGE_HIDDEN_BIASES | PREV_LAYER_NONE,
&hidden_biases_tfrmd_desc);
assert(status == ZDNN_OK);
status = zdnn_init_ztensor_with_malloc(
&hidden_biases_pre_tfrmd_desc, &hidden_biases_tfrmd_desc, &hidden_biases);
assert(status == ZDNN_OK);
uint64_t hidden_biases_data_size = num_hidden * element_size;
void *hidden_biases_data_f = malloc(hidden_biases_data_size);
void *hidden_biases_data_i = malloc(hidden_biases_data_size);
void *hidden_biases_data_c = malloc(hidden_biases_data_size);
void *hidden_biases_data_o = malloc(hidden_biases_data_size);
status = zdnn_transform_ztensor(&hidden_biases, hidden_biases_data_f,
hidden_biases_data_i, hidden_biases_data_c,
hidden_biases_data_o);
assert(status == ZDNN_OK);
/***********************************************************************
* Create output zTensor
***********************************************************************/
// get only the last timestep, thus hn and cf can share descriptor
zdnn_tensor_desc hncf_pre_tfrmd_desc, hncf_tfrmd_desc;
zdnn_ztensor hn_output_ztensor, cf_output_ztensor;
zdnn_init_pre_transformed_desc(ZDNN_4DS, type, &hncf_pre_tfrmd_desc, 1, 1,
num_batches, num_hidden);
status =
zdnn_generate_transformed_desc(&hncf_pre_tfrmd_desc, &hncf_tfrmd_desc);
assert(status == ZDNN_OK);
status = zdnn_init_ztensor_with_malloc(&hncf_pre_tfrmd_desc, &hncf_tfrmd_desc,
&hn_output_ztensor);
assert(status == ZDNN_OK);
status = zdnn_init_ztensor_with_malloc(&hncf_pre_tfrmd_desc, &hncf_tfrmd_desc,
&cf_output_ztensor);
assert(status == ZDNN_OK);
/***********************************************************************
* Call the zAIU
***********************************************************************/
void *work_area = NULL;
status = zdnn_lstm(&input, &h0, &c0, &weights, &biases, &hidden_weights,
&hidden_biases, dir, work_area, &hn_output_ztensor,
&cf_output_ztensor);
assert(status == ZDNN_OK);
/***********************************************************************
* Output and Cleanup
***********************************************************************/
uint64_t hncf_data_size = num_batches * num_hidden * element_size;
void *hn_output_data = malloc(hncf_data_size);
void *cf_output_data = malloc(hncf_data_size);
status = zdnn_transform_origtensor(&hn_output_ztensor, hn_output_data);
assert(status == ZDNN_OK);
status = zdnn_transform_origtensor(&cf_output_ztensor, cf_output_data);
assert(status == ZDNN_OK);
status = zdnn_free_ztensor_buffer(&input);
assert(status == ZDNN_OK);
status = zdnn_free_ztensor_buffer(&h0);
assert(status == ZDNN_OK);
status = zdnn_free_ztensor_buffer(&c0);
assert(status == ZDNN_OK);
status = zdnn_free_ztensor_buffer(&weights);
assert(status == ZDNN_OK);
status = zdnn_free_ztensor_buffer(&biases);
assert(status == ZDNN_OK);
status = zdnn_free_ztensor_buffer(&hidden_weights);
assert(status == ZDNN_OK);
status = zdnn_free_ztensor_buffer(&hidden_biases);
assert(status == ZDNN_OK);
status = zdnn_free_ztensor_buffer(&hn_output_ztensor);
assert(status == ZDNN_OK);
status = zdnn_free_ztensor_buffer(&cf_output_ztensor);
assert(status == ZDNN_OK);
free(input_data);
free(hidden_state_data);
free(cell_state_data);
free(weights_data_f);
free(weights_data_i);
free(weights_data_c);
free(weights_data_o);
free(hidden_weights_data_f);
free(hidden_weights_data_i);
free(hidden_weights_data_c);
free(hidden_weights_data_o);
free(biases_data_f);
free(biases_data_i);
free(biases_data_c);
free(biases_data_o);
free(hidden_biases_data_f);
free(hidden_biases_data_i);
free(hidden_biases_data_c);
free(hidden_biases_data_o);
free(hn_output_data);
free(cf_output_data);
}
```
---
#### Example of an application calling the zdnn_lstm API (bi-directional)
[Back to Table of Contents](#TOC)
```C
// SPDX-License-Identifier: Apache-2.0
/*
* Copyright IBM Corp. 2021
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#include
#include
#include
#include
#include "zdnn.h"
// Sample: LSTM BI-DIR
int main(int argc, char *argv[]) {
zdnn_status status;
#ifdef STATIC_LIB
zdnn_init();
#endif
/***********************************************************************
*
* LSTM (BI-DIR):
*
* INPUTS --------------------------------------------------------------
* input | ZDNN_3DS | (num_timesteps, num_batches, num_features)
* h0 | ZDNN_3DS | (2, num_batches, num_hidden)
* c0 | ZDNN_3DS | (2, num_batches, num_hidden)
* weights | ZDNN_3DS | (2, num_features, num_hidden)
* biases | ZDNN_2DS | (2, num_hidden)
* hidden_weights | ZDNN_3DS | (2, num_hidden, num_hidden)
* hidden_biases | ZDNN_2DS | (2, num_hidden)
*
* OUTPUTS -------------------------------------------------------------
* hn_output | ZDNN_4DS | (num_timesteps, 2, num_batches, num_hidden)
* | | or (1, 2, num_batches, num_hidden)
* cf_output | ZDNN_4DS | (1, 2, num_batches, num_hidden)
***********************************************************************/
/***********************************************************************
* Create input zTensor
***********************************************************************/
zdnn_tensor_desc input_pre_tfrmd_desc, input_tfrmd_desc;
zdnn_ztensor input;
uint32_t num_timesteps = 5;
uint32_t num_batches = 3;
uint32_t num_features = 32;
uint32_t num_hidden = 5;
zdnn_data_types type = FP32;
short element_size = 4; // size of each element in bytes
lstm_gru_direction dir = BIDIR;
uint8_t num_dirs = 2;
zdnn_init_pre_transformed_desc(ZDNN_3DS, type, &input_pre_tfrmd_desc,
num_timesteps, num_batches, num_features);
status =
zdnn_generate_transformed_desc(&input_pre_tfrmd_desc, &input_tfrmd_desc);
assert(status == ZDNN_OK);
status = zdnn_init_ztensor_with_malloc(&input_pre_tfrmd_desc,
&input_tfrmd_desc, &input);
assert(status == ZDNN_OK);
uint64_t input_data_size =
num_timesteps * num_batches * num_features * element_size;
void *input_data = malloc(input_data_size);
status = zdnn_transform_ztensor(&input, input_data);
assert(status == ZDNN_OK);
/***********************************************************************
* Create initial hidden and cell state zTensors
***********************************************************************/
zdnn_tensor_desc h0c0_pre_tfrmd_desc, h0c0_tfrmd_desc;
zdnn_ztensor h0, c0;
zdnn_init_pre_transformed_desc(ZDNN_3DS, type, &h0c0_pre_tfrmd_desc, num_dirs,
num_batches, num_hidden);
status =
zdnn_generate_transformed_desc(&h0c0_pre_tfrmd_desc, &h0c0_tfrmd_desc);
assert(status == ZDNN_OK);
status = zdnn_init_ztensor_with_malloc(&h0c0_pre_tfrmd_desc, &h0c0_tfrmd_desc,
&h0);
assert(status == ZDNN_OK);
status = zdnn_init_ztensor_with_malloc(&h0c0_pre_tfrmd_desc, &h0c0_tfrmd_desc,
&c0);
assert(status == ZDNN_OK);
uint64_t h0c0_data_size = num_batches * num_hidden * element_size;
void *hidden_state_data = malloc(h0c0_data_size);
void *cell_state_data = malloc(h0c0_data_size);
status = zdnn_transform_ztensor(&h0, hidden_state_data);
assert(status == ZDNN_OK);
status = zdnn_transform_ztensor(&c0, cell_state_data);
assert(status == ZDNN_OK);
/***********************************************************************
* Create input weights zTensor
* Resultant zTensor is concatenated
***********************************************************************/
zdnn_tensor_desc weights_pre_tfrmd_desc, weights_tfrmd_desc;
zdnn_ztensor weights;
zdnn_init_pre_transformed_desc(ZDNN_3DS, type, &weights_pre_tfrmd_desc,
num_dirs, num_features, num_hidden);
status = zdnn_generate_transformed_desc_concatenated(
&weights_pre_tfrmd_desc, RNN_TYPE_LSTM | USAGE_WEIGHTS | PREV_LAYER_NONE,
&weights_tfrmd_desc);
assert(status == ZDNN_OK);
status = zdnn_init_ztensor_with_malloc(&weights_pre_tfrmd_desc,
&weights_tfrmd_desc, &weights);
assert(status == ZDNN_OK);
uint64_t weights_data_size = num_features * num_hidden * element_size;
void *weights_data_f = malloc(weights_data_size);
void *weights_data_i = malloc(weights_data_size);
void *weights_data_c = malloc(weights_data_size);
void *weights_data_o = malloc(weights_data_size);
status = zdnn_transform_ztensor(&weights, weights_data_f, weights_data_i,
weights_data_c, weights_data_o);
assert(status == ZDNN_OK);
/***********************************************************************
* Create biases zTensors
* Resultant zTensors are concatenated
***********************************************************************/
zdnn_tensor_desc biases_pre_tfrmd_desc, biases_tfrmd_desc;
zdnn_ztensor biases;
zdnn_init_pre_transformed_desc(ZDNN_2DS, type, &biases_pre_tfrmd_desc,
num_dirs, num_hidden);
status = zdnn_generate_transformed_desc_concatenated(
&biases_pre_tfrmd_desc, RNN_TYPE_LSTM | USAGE_BIASES | PREV_LAYER_NONE,
&biases_tfrmd_desc);
assert(status == ZDNN_OK);
status = zdnn_init_ztensor_with_malloc(&biases_pre_tfrmd_desc,
&biases_tfrmd_desc, &biases);
assert(status == ZDNN_OK);
uint64_t biases_data_size = num_hidden * element_size;
void *biases_data_f = malloc(biases_data_size);
void *biases_data_i = malloc(biases_data_size);
void *biases_data_c = malloc(biases_data_size);
void *biases_data_o = malloc(biases_data_size);
status = zdnn_transform_ztensor(&biases, biases_data_f, biases_data_i,
biases_data_c, biases_data_o);
assert(status == ZDNN_OK);
/***********************************************************************
* Create hidden weights zTensor
* Resultant zTensor is concatenated
***********************************************************************/
zdnn_tensor_desc hidden_weights_pre_tfrmd_desc, hidden_weights_tfrmd_desc;
zdnn_ztensor hidden_weights;
zdnn_init_pre_transformed_desc(ZDNN_3DS, type, &hidden_weights_pre_tfrmd_desc,
num_dirs, num_hidden, num_hidden);
status = zdnn_generate_transformed_desc_concatenated(
&hidden_weights_pre_tfrmd_desc,
RNN_TYPE_LSTM | USAGE_HIDDEN_WEIGHTS | PREV_LAYER_NONE,
&hidden_weights_tfrmd_desc);
assert(status == ZDNN_OK);
status = zdnn_init_ztensor_with_malloc(&hidden_weights_pre_tfrmd_desc,
&hidden_weights_tfrmd_desc,
&hidden_weights);
assert(status == ZDNN_OK);
uint64_t hidden_weights_data_size = num_hidden * num_hidden * element_size;
void *hidden_weights_data_f = malloc(hidden_weights_data_size);
void *hidden_weights_data_i = malloc(hidden_weights_data_size);
void *hidden_weights_data_c = malloc(hidden_weights_data_size);
void *hidden_weights_data_o = malloc(hidden_weights_data_size);
status = zdnn_transform_ztensor(&hidden_weights, hidden_weights_data_f,
hidden_weights_data_i, hidden_weights_data_c,
hidden_weights_data_o);
assert(status == ZDNN_OK);
/***********************************************************************
* Create hidden biases zTensors
* Resultant zTensors are concatenated
***********************************************************************/
zdnn_tensor_desc hidden_biases_pre_tfrmd_desc, hidden_biases_tfrmd_desc;
zdnn_ztensor hidden_biases;
zdnn_init_pre_transformed_desc(ZDNN_2DS, type, &hidden_biases_pre_tfrmd_desc,
num_dirs, num_hidden);
status = zdnn_generate_transformed_desc_concatenated(
&hidden_biases_pre_tfrmd_desc,
RNN_TYPE_LSTM | USAGE_HIDDEN_BIASES | PREV_LAYER_NONE,
&hidden_biases_tfrmd_desc);
assert(status == ZDNN_OK);
status = zdnn_init_ztensor_with_malloc(
&hidden_biases_pre_tfrmd_desc, &hidden_biases_tfrmd_desc, &hidden_biases);
assert(status == ZDNN_OK);
uint64_t hidden_biases_data_size = num_hidden * element_size;
void *hidden_biases_data_f = malloc(hidden_biases_data_size);
void *hidden_biases_data_i = malloc(hidden_biases_data_size);
void *hidden_biases_data_c = malloc(hidden_biases_data_size);
void *hidden_biases_data_o = malloc(hidden_biases_data_size);
status = zdnn_transform_ztensor(&hidden_biases, hidden_biases_data_f,
hidden_biases_data_i, hidden_biases_data_c,
hidden_biases_data_o);
assert(status == ZDNN_OK);
/***********************************************************************
* Create output zTensor
***********************************************************************/
zdnn_tensor_desc hn_pre_tfrmd_desc, hn_tfrmd_desc, cf_pre_tfrmd_desc,
cf_tfrmd_desc;
zdnn_ztensor hn_output_ztensor, cf_output_ztensor;
zdnn_init_pre_transformed_desc(ZDNN_4DS, type, &hn_pre_tfrmd_desc,
num_timesteps, 2, num_batches, num_hidden);
status = zdnn_generate_transformed_desc(&hn_pre_tfrmd_desc, &hn_tfrmd_desc);
assert(status == ZDNN_OK);
zdnn_init_pre_transformed_desc(ZDNN_3DS, type, &cf_pre_tfrmd_desc, 1, 2,
num_batches, num_hidden);
status = zdnn_generate_transformed_desc(&cf_pre_tfrmd_desc, &cf_tfrmd_desc);
assert(status == ZDNN_OK);
status = zdnn_init_ztensor_with_malloc(&hn_pre_tfrmd_desc, &hn_tfrmd_desc,
&hn_output_ztensor);
assert(status == ZDNN_OK);
status = zdnn_init_ztensor_with_malloc(&cf_pre_tfrmd_desc, &cf_tfrmd_desc,
&cf_output_ztensor);
assert(status == ZDNN_OK);
/***********************************************************************
* Call the zAIU
***********************************************************************/
void *work_area = NULL;
status = zdnn_lstm(&input, &h0, &c0, &weights, &biases, &hidden_weights,
&hidden_biases, dir, work_area, &hn_output_ztensor,
&cf_output_ztensor);
assert(status == ZDNN_OK);
/***********************************************************************
* Output and Cleanup
***********************************************************************/
uint64_t hn_data_size =
num_timesteps * 2 * num_batches * num_hidden * element_size;
uint64_t cf_data_size = 2 * num_batches * num_hidden * element_size;
void *hn_output_data = malloc(hn_data_size);
void *cf_output_data = malloc(cf_data_size);
status = zdnn_transform_origtensor(&hn_output_ztensor, hn_output_data);
assert(status == ZDNN_OK);
status = zdnn_transform_origtensor(&cf_output_ztensor, cf_output_data);
assert(status == ZDNN_OK);
status = zdnn_free_ztensor_buffer(&input);
assert(status == ZDNN_OK);
status = zdnn_free_ztensor_buffer(&h0);
assert(status == ZDNN_OK);
status = zdnn_free_ztensor_buffer(&c0);
assert(status == ZDNN_OK);
status = zdnn_free_ztensor_buffer(&weights);
assert(status == ZDNN_OK);
status = zdnn_free_ztensor_buffer(&biases);
assert(status == ZDNN_OK);
status = zdnn_free_ztensor_buffer(&hidden_weights);
assert(status == ZDNN_OK);
status = zdnn_free_ztensor_buffer(&hidden_biases);
assert(status == ZDNN_OK);
status = zdnn_free_ztensor_buffer(&hn_output_ztensor);
assert(status == ZDNN_OK);
status = zdnn_free_ztensor_buffer(&cf_output_ztensor);
assert(status == ZDNN_OK);
free(input_data);
free(hidden_state_data);
free(cell_state_data);
free(weights_data_f);
free(weights_data_i);
free(weights_data_c);
free(weights_data_o);
free(hidden_weights_data_f);
free(hidden_weights_data_i);
free(hidden_weights_data_c);
free(hidden_weights_data_o);
free(biases_data_f);
free(biases_data_i);
free(biases_data_c);
free(biases_data_o);
free(hidden_biases_data_f);
free(hidden_biases_data_i);
free(hidden_biases_data_c);
free(hidden_biases_data_o);
free(hn_output_data);
free(cf_output_data);
}
```
---
### Example of an application calling the zdnn_lstm API
#### Example of an application calling the zdnn_lstm API (multi-layer bi-directional)
[Back to Table of Contents](#TOC)
```C
// SPDX-License-Identifier: Apache-2.0
/*
* Copyright IBM Corp. 2021
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#include
#include
#include
#include
#include "zdnn.h"
void do_bidir_layer(zdnn_ztensor *input, uint32_t num_hidden,
zdnn_ztensor *hn_output, bool is_prev_layer_bidir) {
zdnn_status status;
uint32_t num_batches = input->pre_transformed_desc->dim2;
// if input is bidir output from previous layer then number of features for
// this layer is 2x of hidden-state size (dim1) of the previous layer
uint32_t num_features =
input->pre_transformed_desc->dim1 * (is_prev_layer_bidir ? 2 : 1);
zdnn_data_types type = FP32;
short element_size = 4; // size of each element in bytes
lstm_gru_direction dir = BIDIR;
uint8_t num_dirs = 2;
/***********************************************************************
* Create initial hidden and cell state zTensors
***********************************************************************/
zdnn_tensor_desc h0c0_pre_tfrmd_desc, h0c0_tfrmd_desc;
zdnn_ztensor h0, c0;
zdnn_init_pre_transformed_desc(ZDNN_3DS, type, &h0c0_pre_tfrmd_desc, num_dirs,
num_batches, num_hidden);
status =
zdnn_generate_transformed_desc(&h0c0_pre_tfrmd_desc, &h0c0_tfrmd_desc);
assert(status == ZDNN_OK);
status = zdnn_init_ztensor_with_malloc(&h0c0_pre_tfrmd_desc, &h0c0_tfrmd_desc,
&h0);
assert(status == ZDNN_OK);
status = zdnn_init_ztensor_with_malloc(&h0c0_pre_tfrmd_desc, &h0c0_tfrmd_desc,
&c0);
assert(status == ZDNN_OK);
uint64_t h0c0_data_size = num_batches * num_hidden * element_size;
void *hidden_state_data = malloc(h0c0_data_size);
void *cell_state_data = malloc(h0c0_data_size);
status = zdnn_transform_ztensor(&h0, hidden_state_data);
assert(status == ZDNN_OK);
status = zdnn_transform_ztensor(&c0, cell_state_data);
assert(status == ZDNN_OK);
/***********************************************************************
* Create input weights zTensor
* Resultant zTensor is concatenated
***********************************************************************/
zdnn_tensor_desc weights_pre_tfrmd_desc, weights_tfrmd_desc;
zdnn_ztensor weights;
// if using previous layer bidir output as input then number of features of
// this layer is
zdnn_init_pre_transformed_desc(ZDNN_3DS, type, &weights_pre_tfrmd_desc,
num_dirs, num_features, num_hidden);
status = zdnn_generate_transformed_desc_concatenated(
&weights_pre_tfrmd_desc,
RNN_TYPE_LSTM | USAGE_WEIGHTS |
(is_prev_layer_bidir ? PREV_LAYER_BIDIR : PREV_LAYER_UNI),
&weights_tfrmd_desc);
assert(status == ZDNN_OK);
status = zdnn_init_ztensor_with_malloc(&weights_pre_tfrmd_desc,
&weights_tfrmd_desc, &weights);
assert(status == ZDNN_OK);
uint64_t weights_data_size = num_features * num_hidden * element_size;
void *weights_data_f = malloc(weights_data_size);
void *weights_data_i = malloc(weights_data_size);
void *weights_data_c = malloc(weights_data_size);
void *weights_data_o = malloc(weights_data_size);
status = zdnn_transform_ztensor(&weights, weights_data_f, weights_data_i,
weights_data_c, weights_data_o);
assert(status == ZDNN_OK);
/***********************************************************************
* Create biases zTensors
* Resultant zTensors are concatenated
***********************************************************************/
zdnn_tensor_desc biases_pre_tfrmd_desc, biases_tfrmd_desc;
zdnn_ztensor biases;
zdnn_init_pre_transformed_desc(ZDNN_2DS, type, &biases_pre_tfrmd_desc,
num_dirs, num_hidden);
status = zdnn_generate_transformed_desc_concatenated(
&biases_pre_tfrmd_desc,
RNN_TYPE_LSTM | USAGE_BIASES |
(is_prev_layer_bidir ? PREV_LAYER_BIDIR : PREV_LAYER_UNI),
&biases_tfrmd_desc);
assert(status == ZDNN_OK);
status = zdnn_init_ztensor_with_malloc(&biases_pre_tfrmd_desc,
&biases_tfrmd_desc, &biases);
assert(status == ZDNN_OK);
uint64_t biases_data_size = num_hidden * element_size;
void *biases_data_f = malloc(biases_data_size);
void *biases_data_i = malloc(biases_data_size);
void *biases_data_c = malloc(biases_data_size);
void *biases_data_o = malloc(biases_data_size);
status = zdnn_transform_ztensor(&biases, biases_data_f, biases_data_i,
biases_data_c, biases_data_o);
assert(status == ZDNN_OK);
/***********************************************************************
* Create hidden weights zTensor
* Resultant zTensor is concatenated
***********************************************************************/
zdnn_tensor_desc hidden_weights_pre_tfrmd_desc, hidden_weights_tfrmd_desc;
zdnn_ztensor hidden_weights;
zdnn_init_pre_transformed_desc(ZDNN_3DS, type, &hidden_weights_pre_tfrmd_desc,
num_dirs, num_hidden, num_hidden);
status = zdnn_generate_transformed_desc_concatenated(
&hidden_weights_pre_tfrmd_desc,
RNN_TYPE_LSTM | USAGE_HIDDEN_WEIGHTS |
(is_prev_layer_bidir ? PREV_LAYER_BIDIR : PREV_LAYER_UNI),
&hidden_weights_tfrmd_desc);
assert(status == ZDNN_OK);
status = zdnn_init_ztensor_with_malloc(&hidden_weights_pre_tfrmd_desc,
&hidden_weights_tfrmd_desc,
&hidden_weights);
assert(status == ZDNN_OK);
uint64_t hidden_weights_data_size = num_hidden * num_hidden * element_size;
void *hidden_weights_data_f = malloc(hidden_weights_data_size);
void *hidden_weights_data_i = malloc(hidden_weights_data_size);
void *hidden_weights_data_c = malloc(hidden_weights_data_size);
void *hidden_weights_data_o = malloc(hidden_weights_data_size);
status = zdnn_transform_ztensor(&hidden_weights, hidden_weights_data_f,
hidden_weights_data_i, hidden_weights_data_c,
hidden_weights_data_o);
assert(status == ZDNN_OK);
/***********************************************************************
* Create hidden biases zTensors
* Resultant zTensors are concatenated
***********************************************************************/
zdnn_tensor_desc hidden_biases_pre_tfrmd_desc, hidden_biases_tfrmd_desc;
zdnn_ztensor hidden_biases;
zdnn_init_pre_transformed_desc(ZDNN_2DS, type, &hidden_biases_pre_tfrmd_desc,
num_dirs, num_hidden);
status = zdnn_generate_transformed_desc_concatenated(
&hidden_biases_pre_tfrmd_desc,
RNN_TYPE_LSTM | USAGE_HIDDEN_BIASES |
(is_prev_layer_bidir ? PREV_LAYER_BIDIR : PREV_LAYER_UNI),
&hidden_biases_tfrmd_desc);
assert(status == ZDNN_OK);
status = zdnn_init_ztensor_with_malloc(
&hidden_biases_pre_tfrmd_desc, &hidden_biases_tfrmd_desc, &hidden_biases);
assert(status == ZDNN_OK);
uint64_t hidden_biases_data_size = num_hidden * element_size;
void *hidden_biases_data_f = malloc(hidden_biases_data_size);
void *hidden_biases_data_i = malloc(hidden_biases_data_size);
void *hidden_biases_data_c = malloc(hidden_biases_data_size);
void *hidden_biases_data_o = malloc(hidden_biases_data_size);
status = zdnn_transform_ztensor(&hidden_biases, hidden_biases_data_f,
hidden_biases_data_i, hidden_biases_data_c,
hidden_biases_data_o);
assert(status == ZDNN_OK);
/***********************************************************************
* Create cf output zTensor
***********************************************************************/
zdnn_tensor_desc cf_pre_tfrmd_desc, cf_tfrmd_desc;
zdnn_ztensor cf_output_ztensor;
zdnn_init_pre_transformed_desc(ZDNN_4DS, type, &cf_pre_tfrmd_desc, 1, 2,
num_batches, num_hidden);
status = zdnn_generate_transformed_desc(&cf_pre_tfrmd_desc, &cf_tfrmd_desc);
assert(status == ZDNN_OK);
status = zdnn_init_ztensor_with_malloc(&cf_pre_tfrmd_desc, &cf_tfrmd_desc,
&cf_output_ztensor);
assert(status == ZDNN_OK);
/***********************************************************************
* Call the zAIU
***********************************************************************/
void *work_area = NULL;
status =
zdnn_lstm(input, &h0, &c0, &weights, &biases, &hidden_weights,
&hidden_biases, dir, work_area, hn_output, &cf_output_ztensor);
assert(status == ZDNN_OK);
/***********************************************************************
* Cleanup and Return
***********************************************************************/
status = zdnn_free_ztensor_buffer(&h0);
assert(status == ZDNN_OK);
status = zdnn_free_ztensor_buffer(&c0);
assert(status == ZDNN_OK);
status = zdnn_free_ztensor_buffer(&weights);
assert(status == ZDNN_OK);
status = zdnn_free_ztensor_buffer(&biases);
assert(status == ZDNN_OK);
status = zdnn_free_ztensor_buffer(&hidden_weights);
assert(status == ZDNN_OK);
status = zdnn_free_ztensor_buffer(&hidden_biases);
assert(status == ZDNN_OK);
status = zdnn_free_ztensor_buffer(&cf_output_ztensor);
assert(status == ZDNN_OK);
free(hidden_state_data);
free(cell_state_data);
free(weights_data_f);
free(weights_data_i);
free(weights_data_c);
free(weights_data_o);
free(hidden_weights_data_f);
free(hidden_weights_data_i);
free(hidden_weights_data_c);
free(hidden_weights_data_o);
free(biases_data_f);
free(biases_data_i);
free(biases_data_c);
free(biases_data_o);
free(hidden_biases_data_f);
free(hidden_biases_data_i);
free(hidden_biases_data_c);
free(hidden_biases_data_o);
}
// Sample: LSTM multi-layer BIDIR
int main(int argc, char *argv[]) {
zdnn_status status;
#ifdef STATIC_LIB
zdnn_init();
#endif
uint32_t num_hidden[2] = {5, 4};
/***********************************************************************
* Create input zTensor
***********************************************************************/
zdnn_tensor_desc input_pre_tfrmd_desc, input_tfrmd_desc;
zdnn_ztensor input;
uint32_t num_timesteps = 5;
uint32_t num_batches = 3;
uint32_t num_features = 32;
zdnn_data_types type = FP32;
short element_size = 4; // size of each element in bytes
zdnn_init_pre_transformed_desc(ZDNN_3DS, type, &input_pre_tfrmd_desc,
num_timesteps, num_batches, num_features);
status =
zdnn_generate_transformed_desc(&input_pre_tfrmd_desc, &input_tfrmd_desc);
assert(status == ZDNN_OK);
status = zdnn_init_ztensor_with_malloc(&input_pre_tfrmd_desc,
&input_tfrmd_desc, &input);
assert(status == ZDNN_OK);
uint64_t input_data_size =
num_timesteps * num_batches * num_features * element_size;
void *input_data = malloc(input_data_size);
status = zdnn_transform_ztensor(&input, input_data);
assert(status == ZDNN_OK);
/***********************************************************************
* Create 2 hn output zTensors
***********************************************************************/
zdnn_tensor_desc hn_pre_tfrmd_desc[2], hn_tfrmd_desc[2];
zdnn_ztensor hn_output[2];
for (int i = 0; i < 2; i++) {
zdnn_init_pre_transformed_desc(ZDNN_4DS, type, &hn_pre_tfrmd_desc[i],
num_timesteps, 2, num_batches,
num_hidden[i]);
status = zdnn_generate_transformed_desc(&hn_pre_tfrmd_desc[i],
&hn_tfrmd_desc[i]);
assert(status == ZDNN_OK);
status = zdnn_init_ztensor_with_malloc(&hn_pre_tfrmd_desc[i],
&hn_tfrmd_desc[i], &hn_output[i]);
assert(status == ZDNN_OK);
}
/***********************************************************************
* Do the layers
***********************************************************************/
// call the first layer with input, previous layer bidir = false, output goes
// to hn_output[0]
do_bidir_layer(&input, num_hidden[0], &hn_output[0], false);
// call the second layer with hn_output[0] from layer 1, previous layer bidir
// = true, output goes to hn_output[1]
do_bidir_layer(&hn_output[0], num_hidden[1], &hn_output[1], true);
/***********************************************************************
* Output and Cleanup
***********************************************************************/
void *hn_output_data[2];
for (int i = 0; i < 2; i++) {
uint64_t hn_output_data_size = (uint64_t)num_timesteps * num_batches *
num_hidden[i] * 2 * element_size;
hn_output_data[i] = malloc(hn_output_data_size);
status = zdnn_transform_origtensor(&hn_output[i], hn_output_data[i]);
assert(status == ZDNN_OK);
}
status = zdnn_free_ztensor_buffer(&input);
assert(status == ZDNN_OK);
status = zdnn_free_ztensor_buffer(&hn_output[0]);
assert(status == ZDNN_OK);
status = zdnn_free_ztensor_buffer(&hn_output[1]);
assert(status == ZDNN_OK);
free(input_data);
free(hn_output_data[0]);
free(hn_output_data[1]);
}
```
---
### Example of an application calling the zdnn_gru API
#### Example of an application calling the zdnn_gru API (forward)
[Back to Table of Contents](#TOC)
```C
// SPDX-License-Identifier: Apache-2.0
/*
* Copyright IBM Corp. 2021
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#include
#include
#include
#include
#include "zdnn.h"
// Sample: GRU
int main(int argc, char *argv[]) {
zdnn_status status;
#ifdef STATIC_LIB
zdnn_init();
#endif
/***********************************************************************
*
* GRU (FWD/BWD):
*
* INPUTS --------------------------------------------------------------
* input | ZDNN_3DS | (num_timesteps, num_batches, num_features)
* h0 | ZDNN_3DS | (1, num_batches, num_hidden)
* weights | ZDNN_3DS | (1, num_features, num_hidden)
* input_biases | ZDNN_2DS | (1, num_hidden)
* hidden_weights | ZDNN_3DS | (1, num_hidden, num_hidden)
* hidden_biases | ZDNN_2DS | (1, num_hidden)
*
* OUTPUTS -------------------------------------------------------------
* hn_output | ZDNN_4DS | (num_timesteps, 1, num_batches, num_hidden)
* | | or (1, 1, num_batches, num_hidden)
***********************************************************************/
/***********************************************************************
* Create input zTensor
***********************************************************************/
zdnn_tensor_desc input_pre_tfrmd_desc, input_tfrmd_desc;
zdnn_ztensor input;
uint32_t num_timesteps = 5;
uint32_t num_batches = 3;
uint32_t num_features = 32;
uint32_t num_hidden = 5;
zdnn_data_types type = FP32;
short element_size = 4; // size of each element in bytes
lstm_gru_direction dir = FWD;
uint8_t num_dirs = 1;
zdnn_init_pre_transformed_desc(ZDNN_3DS, type, &input_pre_tfrmd_desc,
num_timesteps, num_batches, num_features);
status =
zdnn_generate_transformed_desc(&input_pre_tfrmd_desc, &input_tfrmd_desc);
assert(status == ZDNN_OK);
status = zdnn_init_ztensor_with_malloc(&input_pre_tfrmd_desc,
&input_tfrmd_desc, &input);
assert(status == ZDNN_OK);
uint64_t input_data_size =
num_timesteps * num_batches * num_features * element_size;
void *input_data = malloc(input_data_size);
status = zdnn_transform_ztensor(&input, input_data);
assert(status == ZDNN_OK);
/***********************************************************************
* Create initial hidden zTensor
***********************************************************************/
zdnn_tensor_desc h0_pre_tfrmd_desc, h0_tfrmd_desc;
zdnn_ztensor h0;
zdnn_init_pre_transformed_desc(ZDNN_3DS, type, &h0_pre_tfrmd_desc, num_dirs,
num_batches, num_hidden);
status = zdnn_generate_transformed_desc(&h0_pre_tfrmd_desc, &h0_tfrmd_desc);
assert(status == ZDNN_OK);
status =
zdnn_init_ztensor_with_malloc(&h0_pre_tfrmd_desc, &h0_tfrmd_desc, &h0);
assert(status == ZDNN_OK);
uint64_t h0_data_size = num_batches * num_hidden * element_size;
void *hidden_state_data = malloc(h0_data_size);
status = zdnn_transform_ztensor(&h0, hidden_state_data);
assert(status == ZDNN_OK);
/***********************************************************************
* Create input weights zTensor
* Resultant zTensor is concatenated
***********************************************************************/
zdnn_tensor_desc weights_pre_tfrmd_desc, weights_tfrmd_desc;
zdnn_ztensor weights;
zdnn_init_pre_transformed_desc(ZDNN_3DS, type, &weights_pre_tfrmd_desc,
num_dirs, num_features, num_hidden);
status = zdnn_generate_transformed_desc_concatenated(
&weights_pre_tfrmd_desc, RNN_TYPE_GRU | USAGE_WEIGHTS | PREV_LAYER_NONE,
&weights_tfrmd_desc);
assert(status == ZDNN_OK);
status = zdnn_init_ztensor_with_malloc(&weights_pre_tfrmd_desc,
&weights_tfrmd_desc, &weights);
assert(status == ZDNN_OK);
uint64_t weights_data_size = num_features * num_hidden * element_size;
void *weights_data_z = malloc(weights_data_size);
void *weights_data_r = malloc(weights_data_size);
void *weights_data_h = malloc(weights_data_size);
status = zdnn_transform_ztensor(&weights, weights_data_z, weights_data_r,
weights_data_h);
assert(status == ZDNN_OK);
/***********************************************************************
* Create biases zTensors
* Resultant zTensors are concatenated
***********************************************************************/
zdnn_tensor_desc biases_pre_tfrmd_desc, biases_tfrmd_desc;
zdnn_ztensor biases;
zdnn_init_pre_transformed_desc(ZDNN_2DS, type, &biases_pre_tfrmd_desc,
num_dirs, num_hidden);
status = zdnn_generate_transformed_desc_concatenated(
&biases_pre_tfrmd_desc, RNN_TYPE_GRU | USAGE_BIASES | PREV_LAYER_NONE,
&biases_tfrmd_desc);
assert(status == ZDNN_OK);
status = zdnn_init_ztensor_with_malloc(&biases_pre_tfrmd_desc,
&biases_tfrmd_desc, &biases);
assert(status == ZDNN_OK);
uint64_t biases_data_size = num_hidden * element_size;
void *biases_data_z = malloc(biases_data_size);
void *biases_data_r = malloc(biases_data_size);
void *biases_data_h = malloc(biases_data_size);
status = zdnn_transform_ztensor(&biases, biases_data_z, biases_data_r,
biases_data_h);
assert(status == ZDNN_OK);
/***********************************************************************
* Create hidden weights zTensor
* Resultant zTensor is concatenated
***********************************************************************/
zdnn_tensor_desc hidden_weights_pre_tfrmd_desc, hidden_weights_tfrmd_desc;
zdnn_ztensor hidden_weights;
zdnn_init_pre_transformed_desc(ZDNN_3DS, type, &hidden_weights_pre_tfrmd_desc,
num_dirs, num_hidden, num_hidden);
status = zdnn_generate_transformed_desc_concatenated(
&hidden_weights_pre_tfrmd_desc,
RNN_TYPE_GRU | USAGE_HIDDEN_WEIGHTS | PREV_LAYER_NONE,
&hidden_weights_tfrmd_desc);
assert(status == ZDNN_OK);
status = zdnn_init_ztensor_with_malloc(&hidden_weights_pre_tfrmd_desc,
&hidden_weights_tfrmd_desc,
&hidden_weights);
assert(status == ZDNN_OK);
uint64_t hidden_weights_data_size = num_hidden * num_hidden * element_size;
void *hidden_weights_data_z = malloc(hidden_weights_data_size);
void *hidden_weights_data_r = malloc(hidden_weights_data_size);
void *hidden_weights_data_h = malloc(hidden_weights_data_size);
status = zdnn_transform_ztensor(&hidden_weights, hidden_weights_data_z,
hidden_weights_data_r, hidden_weights_data_h);
assert(status == ZDNN_OK);
/***********************************************************************
* Create hidden biases zTensors
* Resultant zTensors are concatenated
***********************************************************************/
zdnn_tensor_desc hidden_biases_pre_tfrmd_desc, hidden_biases_tfrmd_desc;
zdnn_ztensor hidden_biases;
zdnn_init_pre_transformed_desc(ZDNN_2DS, type, &hidden_biases_pre_tfrmd_desc,
num_dirs, num_hidden);
status = zdnn_generate_transformed_desc_concatenated(
&hidden_biases_pre_tfrmd_desc,
RNN_TYPE_GRU | USAGE_HIDDEN_BIASES | PREV_LAYER_NONE,
&hidden_biases_tfrmd_desc);
assert(status == ZDNN_OK);
status = zdnn_init_ztensor_with_malloc(
&hidden_biases_pre_tfrmd_desc, &hidden_biases_tfrmd_desc, &hidden_biases);
assert(status == ZDNN_OK);
uint64_t hidden_biases_data_size = num_hidden * element_size;
void *hidden_biases_data_z = malloc(hidden_biases_data_size);
void *hidden_biases_data_r = malloc(hidden_biases_data_size);
void *hidden_biases_data_h = malloc(hidden_biases_data_size);
status = zdnn_transform_ztensor(&hidden_biases, hidden_biases_data_z,
hidden_biases_data_r, hidden_biases_data_h);
assert(status == ZDNN_OK);
/***********************************************************************
* Create output zTensor
***********************************************************************/
// get only the last timestep
zdnn_tensor_desc hn_pre_tfrmd_desc, hn_tfrmd_desc;
zdnn_ztensor hn_output_ztensor;
zdnn_init_pre_transformed_desc(ZDNN_4DS, type, &hn_pre_tfrmd_desc, 1, 1,
num_batches, num_hidden);
status = zdnn_generate_transformed_desc(&hn_pre_tfrmd_desc, &hn_tfrmd_desc);
assert(status == ZDNN_OK);
status = zdnn_init_ztensor_with_malloc(&hn_pre_tfrmd_desc, &hn_tfrmd_desc,
&hn_output_ztensor);
assert(status == ZDNN_OK);
/***********************************************************************
* Call the zAIU
***********************************************************************/
void *work_area = NULL;
status = zdnn_gru(&input, &h0, &weights, &biases, &hidden_weights,
&hidden_biases, dir, work_area, &hn_output_ztensor);
assert(status == ZDNN_OK);
/***********************************************************************
* Output and Cleanup
***********************************************************************/
uint64_t hn_data_size = num_batches * num_hidden * element_size;
void *hn_output_data = malloc(hn_data_size);
status = zdnn_transform_origtensor(&hn_output_ztensor, hn_output_data);
assert(status == ZDNN_OK);
status = zdnn_free_ztensor_buffer(&input);
assert(status == ZDNN_OK);
status = zdnn_free_ztensor_buffer(&h0);
assert(status == ZDNN_OK);
status = zdnn_free_ztensor_buffer(&weights);
assert(status == ZDNN_OK);
status = zdnn_free_ztensor_buffer(&biases);
assert(status == ZDNN_OK);
status = zdnn_free_ztensor_buffer(&hidden_weights);
assert(status == ZDNN_OK);
status = zdnn_free_ztensor_buffer(&hidden_biases);
assert(status == ZDNN_OK);
status = zdnn_free_ztensor_buffer(&hn_output_ztensor);
assert(status == ZDNN_OK);
free(input_data);
free(hidden_state_data);
free(weights_data_z);
free(weights_data_r);
free(weights_data_h);
free(hidden_weights_data_z);
free(hidden_weights_data_r);
free(hidden_weights_data_h);
free(biases_data_z);
free(biases_data_r);
free(biases_data_h);
free(hidden_biases_data_z);
free(hidden_biases_data_r);
free(hidden_biases_data_h);
free(hn_output_data);
}
```
---
### Example of an application creating a quantized ztensor
[Back to Table of Contents](#TOC)
```C
// SPDX-License-Identifier: Apache-2.0
/*
* Copyright IBM Corp. 2023
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#include
#include
#include
#include
#include
#include "zdnn.h"
// ***************************************************************************
// Sample:
//
// Create a quantized zTensors
// ***************************************************************************
int main(int argc, char *argv[]) {
zdnn_tensor_desc pre_tfrmd_desc, tfrmd_desc;
zdnn_ztensor ztensor;
zdnn_status status;
uint32_t dim_n = 1, dim_h = 32, dim_w = 32, dim_c = 3;
zdnn_data_types type = FP32;
short element_size = 4; // size of each element in bytes
uint64_t num_elements = dim_n * dim_h * dim_w * dim_c;
// allocate tensor data storage
void *data1 = malloc(num_elements * element_size);
// read input_data
// check status for zAIU availability, supported ops, etc. here
// status = zdnn_query();
// set input tensor data to 0 to 127 sequentially and repeat
for (uint64_t i = 0; i < num_elements; i++) {
((float *)data1)[i] = (float)(i & 0x7f);
}
zdnn_init_pre_transformed_desc(ZDNN_NHWC, type, &pre_tfrmd_desc, dim_n, dim_h,
dim_w, dim_c);
float scale = 3;
float offset = 2;
// generate transformed shape information
status = zdnn_generate_quantized_transformed_desc(
&pre_tfrmd_desc, QUANTIZED_DLFLOAT16, &tfrmd_desc);
assert(status == ZDNN_OK);
// initialize zTensors and allocate 4k-aligned storage via helper function
status = zdnn_init_quantized_ztensor_with_malloc(&pre_tfrmd_desc, &tfrmd_desc,
scale, offset, &ztensor);
assert(status == ZDNN_OK);
// transform the feature tensor
status = zdnn_transform_ztensor(&ztensor, data1);
assert(status == ZDNN_OK);
// Free zTensors
status = zdnn_free_ztensor_buffer(&ztensor);
assert(status == ZDNN_OK);
free(data1);
}
```
zDNN-1.1.2/config.h.in 0000664 0000000 0000000 00000001245 15000221702 0014336 0 ustar 00root root 0000000 0000000 /* config.h.in. Generated from configure.ac by autoheader. */
/* Define to the address where bug reports for this package should be sent. */
#undef PACKAGE_BUGREPORT
/* Define to the full name of this package. */
#undef PACKAGE_NAME
/* Define to the full name and version of this package. */
#undef PACKAGE_STRING
/* Define to the one symbol short name of this package. */
#undef PACKAGE_TARNAME
/* Define to the home page for this package. */
#undef PACKAGE_URL
/* Define to the version of this package. */
#undef PACKAGE_VERSION
/* Enable additional checking, error reporting, disable compiler
optimizations, and add debug information */
#undef ZDNN_CONFIG_DEBUG
zDNN-1.1.2/config.make.in 0000664 0000000 0000000 00000003452 15000221702 0015026 0 ustar 00root root 0000000 0000000 # SPDX-License-Identifier: Apache-2.0
#
# Copyright IBM Corp. 2021
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
srcdir := @srcdir@
objdir := @objdir@
# The following variables influences the paths used by make install.
prefix := @prefix@
exec_prefix := @exec_prefix@
includedir := @includedir@
libdir := @libdir@
CC := @CC@
CXX := @CXX@
LD := @LD@
AR := @AR@
ARFLAGS := @ARFLAGS@
CFLAGS_INIT := @CFLAGS_INIT@ "@CFLAGS_QUOTE_INIT@"
CFLAGS := @CFLAGS@ "@CFLAGS_QUOTE@"
CFLAGS_DEBUG := @CFLAGS_DEBUG@
CFLAGS_SHARED := @CFLAGS_SHARED@
CFLAGS_NOSEARCH := @CFLAGS_NOSEARCH@
CXXFLAGS_ASM := @CXXFLAGS_ASM@
CFLAGS_ASM := @CFLAGS_ASM@
CXXFLAGS := @CXXFLAGS@
CPP_SYMCHECK_FLAGS := @CPP_SYMCHECK_FLAGS@
SODIR := @SODIR@
LIBNAME := @LIBNAME@
LIBSONAME := @LIBSONAME@
LIBNAME_PRIVATE=@LIBNAME_PRIVATE@
LIBSONAME_PRIVATE=@LIBSONAME_PRIVATE@
LDFLAGS := @LDFLAGS@
LDFLAGS_SHARED := @LDFLAGS_SHARED@
LDFLAGS_SHARED_EXPORTALL := @LDFLAGS_SHARED_EXPORTALL@
LDFLAGS_TEST := @LDFLAGS_TEST@
LD_PATH_VAR := @LD_PATH_VAR@
ECHOFLAGS := "@ECHOFLAGS@"
AWK := @AWK@
READELF := @READELF@
INSTALL := install -c
INSTALL_DATA := $(INSTALL) -m 644
ZDNN_TMAKE_FILES := @ZDNN_TMAKE_FILES@
ZDNN_MAKE_TARGETS := @ZDNN_MAKE_TARGETS@
ZDNN_INSTALL_TARGETS := @ZDNN_INSTALL_TARGETS@
# Build options
debug := @zdnn_config_debug@
no_rpath := @zdnn_config_no_rpath@
zDNN-1.1.2/config.zdnn 0000664 0000000 0000000 00000007606 15000221702 0014462 0 ustar 00root root 0000000 0000000 # SPDX-License-Identifier: Apache-2.0
#
# Copyright IBM Corp. 2021, 2024
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# This script is invoked by configure to set the initial values of
# certain platform-dependent variables.
target="$(uname -m)-$(uname)"
case "${target}" in
s390x-Linux)
CC=${CC:-gcc}
CXX=${CXX:-g++}
LD=${LD:-g++}
AR=${AR:-ar}
ARFLAGS="${ARFLAGS:--rc}"
CFLAGS_INIT="-O3 -mzvector -Wall -std=gnu99 -fstack-protector-all ${CFLAGS_INIT:-}"
CFLAGS_QUOTE_INIT="-Wall" # Not needed on Linux. Just repeat an option to prevent it from being empty.
CFLAGS_OPT_EXPENSIVE="-funroll-loops"
CFLAGS="-O3 -march=z14 -mzvector -Wall -std=gnu99 -fstack-protector-all ${CFLAGS_OPT_EXPENSIVE} ${CFLAGS:-}"
CFLAGS_QUOTE="-Wall"
CFLAGS_DEBUG="-O0 -g3 ${CFLAGS_DEBUG:-}"
CFLAGS_SHARED="-fPIC ${CFLAGS_SHARED:-}"
CFLAGS_ASM="-Wa,-adhln -fno-asynchronous-unwind-tables ${CFLAGS_ASM:-}"
CFLAGS_NOSEARCH=""
CXXFLAGS="-O3 -march=z14 -Wall ${CXXFLAGS:-}"
CPP_SYMCHECK_FLAGS="-E -o zdnn.i"
SODIR="${SODIR:-lib}"
LIBNAME="${LIBNAME:-libzdnn}"
LIBSONAME="${LIBSONAME:-${LIBNAME}.so.0}"
LIBNAME_PRIVATE="${LIBNAME_PRIVATE:-${LIBNAME}-private}"
LIBSONAME_PRIVATE="${LIBSONAME_PRIVATE:-${LIBNAME_PRIVATE}.so.0}"
LDFLAGS="${LDFLAGS:-}"
LDFLAGS_SHARED="-shared -Wl,-Bsymbolic-functions -Wl,-soname,${LIBSONAME} -Wl,--version-script=zdnn.map -lm ${LDFLAGS_SHARED:-} ${LDFLAGS:-}"
LDFLAGS_SHARED_EXPORTALL="-shared -Wl,-Bsymbolic-functions -Wl,-soname,${LIBSONAME_PRIVATE} -Wl,--version-script=zdnn_exportall.map -lm ${LDFLAGS_SHARED_EXPORTALL:-} ${LDFLAGS:-}"
LDFLAGS_TEST="-L ../zdnn/${SODIR} -l${LIBNAME_PRIVATE#lib} ../zdnn/${SODIR}/${LIBNAME_PRIVATE}.so -lm ${LDFLAGS_TEST:-} ${LDFLAGS:-}"
LD_PATH_VAR="${LD_PATH_VAR:-LD_LIBRARY_PATH}"
ECHOFLAGS="-e"
ZDNN_TMAKE_FILES="t-static t-libsoname t-gccexpo t-symcheck t-listings"
ZDNN_MAKE_TARGETS="${SODIR}/${LIBNAME}.a libsoname symcheck"
ZDNN_INSTALL_TARGETS="install_libsoname install_static"
;;
*-OS/390)
CC=${CC:-xlc}
CXX=${CXX:-xlC}
LD=${LD:-xlC}
AR=${AR:-ar}
ARFLAGS="${ARFLAGS:--rc}"
CXXFLAGS="-I /usr/include -I /usr/include/zos/ -Wc,ASM,LP64,INLINE,VECTOR ${CXXFLAGS:-}"
CFLAGS_INIT="${CXXFLAGS} -qlanglvl=extc99 ${CFLAGS_INIT:-}"
CFLAGS_QUOTE_INIT="-Wc,SUPPRESS(CCN4108),STACKPROTECT(ALL)" # The options with () require an extra pair of quotes in config.make.in
CFLAGS_OPT_EXPENSIVE="-qhot"
CFLAGS="${CXXFLAGS} -qlanglvl=extc99 ${CFLAGS_OPT_EXPENSIVE} ${CFLAGS:-}"
CFLAGS_QUOTE="-Wc,ARCH(13),SUPPRESS(CCN4108),STACKPROTECT(ALL)"
CFLAGS_DEBUG="-g3 ${CFLAGS_DEBUG:-}"
CFLAGS_SHARED="-Wc,DLL ${CFLAGS_SHARED:-}"
CXXFLAGS_ASM='-Wc,"SOURCE,LIST"'" ${CXXFLAGS_ASM:-}"
CFLAGS_ASM="${CXXFLAGS_ASM} -Wc,AGGREGATE ${CFLAGS_ASM:-}"
CFLAGS_NOSEARCH="-qnosearch"
CPP_SYMCHECK_FLAGS="-P"
SODIR="${SODIR:-lib}"
LIBNAME="${LIBNAME:-libzdnn}"
LIBSONAME="${LIBSONAME:-}"
LIBNAME_PRIVATE="${LIBNAME_PRIVATE:-${LIBNAME}-private}"
LIBSONAME_PRIVATE="${LIBSONAME_PRIVATE:-}"
LDFLAGS="${LDFLAGS:-}"
LDFLAGS_SHARED="-Wl,DLL -Wc,LP64 ${LDFLAGS_SHARED:-} ${LDFLAGS:-}"
LDFLAGS_SHARED_EXPORTALL="${LDFLAGS_SHARED_EXPORTALL:-} ${LDFLAGS:-}"
LDFLAGS_TEST="../zdnn/${SODIR}/${LIBNAME_PRIVATE}.x -lm ${LDFLAGS_TEST:-} ${LDFLAGS:-}"
LD_PATH_VAR="${LD_PATH_VAR:-LIBPATH}"
ECHOFLAGS=""
ZDNN_TMAKE_FILES="t-xlcexpo t-symcheck t-listings"
ZDNN_MAKE_TARGETS="${SODIR}/${LIBNAME}.x symcheck"
ZDNN_INSTALL_TARGETS=""
;;
*)
echo "Platform ${target} is not supported"
exit 1
;;
esac
zDNN-1.1.2/configure.ac 0000664 0000000 0000000 00000006131 15000221702 0014600 0 ustar 00root root 0000000 0000000 # SPDX-License-Identifier: Apache-2.0
#
# Copyright IBM Corp. 2021, 2024
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
AC_INIT([libzdnn], [1.1.2])
objdir=`pwd -P`
if test ! "`cd $srcdir; pwd -P`" = "$objdir"; then
AC_MSG_ERROR([Configuring is only allowed in source directory! If needed, please propose a patch!])
fi
. ./config.zdnn || exit 1
# Currently objdir is equal to srcdir, but it is used in "make install"
# in order to distinguish sources and binaries.
AC_SUBST(objdir)
# Check for requirements
AC_PROG_CC
AC_PROG_CXX
AC_CONFIG_HEADERS([config.h])
AC_ARG_VAR(CC)
AC_ARG_VAR(CXX)
AC_ARG_VAR(LD)
AC_ARG_VAR(AR)
AC_ARG_VAR(ARFLAGS)
AC_ARG_VAR(CFLAGS)
AC_ARG_VAR(CFLAGS_INIT)
AC_ARG_VAR(CFLAGS_DEBUG)
AC_ARG_VAR(CFLAGS_SHARED)
AC_ARG_VAR(CFLAGS_ASM)
AC_ARG_VAR(CFLAGS_NOSEARCH)
AC_ARG_VAR(CXXFLAGS)
AC_ARG_VAR(CXXFLAGS_ASM)
AC_ARG_VAR(CPP_SYMCHECK_FLAGS)
AC_ARG_VAR(SODIR)
AC_ARG_VAR(LIBNAME)
AC_ARG_VAR(LIBSONAME)
AC_ARG_VAR(LIBNAME_PRIVATE)
AC_ARG_VAR(LIBSONAME_PRIVATE)
AC_ARG_VAR(LDFLAGS)
AC_ARG_VAR(LDFLAGS_SHARED)
AC_ARG_VAR(LDFLAGS_SHARED_EXPORTALL)
AC_ARG_VAR(LDFLAGS_TEST)
AC_ARG_VAR(ECHOFLAGS)
AC_ARG_VAR(AWK)
AC_ARG_VAR(READELF)
AC_SUBST(CFLAGS_QUOTE)
AC_SUBST(CFLAGS_QUOTE_INIT)
AC_SUBST(zdnn_config_debug, 0)
AC_SUBST(zdnn_config_no_rpath, 0)
AC_SUBST(ZDNN_TMAKE_FILES)
AC_SUBST(ZDNN_MAKE_TARGETS)
AC_SUBST(ZDNN_INSTALL_TARGETS)
AC_SUBST(LD_PATH_VAR)
AC_CHECK_PROG(AWK, awk, awk, false)
AS_IF([test x"$AWK" = x"false"], [AC_MSG_ERROR([Please install awk before configuring.])])
AC_CHECK_TOOL(READELF, readelf, "")
AS_IF([test x"$READELF" = x"false"], [AC_MSG_WARN([readelf is required for checking the exported symbols. Check will be skipped.])])
AC_ARG_ENABLE([debug], AS_HELP_STRING([--enable-debug], [Enable additional checking, error reporting, disable compiler optimizations, and add debug information]))
AS_IF([test "x$enable_debug" = "xyes"], [
zdnn_config_debug=1
CFLAGS="${CFLAGS} ${CFLAGS_DEBUG}"
CFLAGS_INIT="${CFLAGS_INIT} ${CFLAGS_DEBUG}"
CXXFLAGS="${CXXFLAGS} ${CFLAGS_DEBUG}"
AC_DEFINE(ZDNN_CONFIG_DEBUG, 1, [Enable additional checking, error reporting, disable compiler optimizations, and add debug information])
])
AC_ARG_ENABLE([listings], AS_HELP_STRING([--enable-listings], [Make 'make all' generate assembler listings]))
AS_IF([test "x$enable_listings" = "xyes"], [
ZDNN_MAKE_TARGETS="${ZDNN_MAKE_TARGETS} listings"
])
AC_ARG_ENABLE([test-rpath], AS_HELP_STRING([--disable-test-rpath], [Don't set the rpath in the test binaries to keep them relocatable]))
AS_IF([test "x$enable_test_rpath" = "xno"], [zdnn_config_no_rpath=1])
# Generate output
AC_CONFIG_FILES([config.make])
AC_OUTPUT
zDNN-1.1.2/docs/ 0000775 0000000 0000000 00000000000 15000221702 0013241 5 ustar 00root root 0000000 0000000 zDNN-1.1.2/docs/CODEOWNERS 0000664 0000000 0000000 00000000206 15000221702 0014632 0 ustar 00root root 0000000 0000000 # Trigger following contributors/global code owners for each code change.
* nmarion@us.ibm.com krebbel@linux.ibm.com sbj@us.ibm.com
zDNN-1.1.2/docs/pull_request_template.md 0000664 0000000 0000000 00000000666 15000221702 0020212 0 ustar 00root root 0000000 0000000 ### Description
#### Features
#### Fixes
zDNN-1.1.2/licenses/ 0000775 0000000 0000000 00000000000 15000221702 0014116 5 ustar 00root root 0000000 0000000 zDNN-1.1.2/licenses/LICENSE-Unity.txt 0000664 0000000 0000000 00000002133 15000221702 0017046 0 ustar 00root root 0000000 0000000 The MIT License (MIT)
Copyright (c) 2007-24 Mike Karlesky, Mark VanderVoord, Greg Williams
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
in the Software without restriction, including without limitation the rights
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:
The above copyright notice and this permission notice shall be included in
all copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
THE SOFTWARE. zDNN-1.1.2/samples/ 0000775 0000000 0000000 00000000000 15000221702 0013755 5 ustar 00root root 0000000 0000000 zDNN-1.1.2/samples/README.md 0000664 0000000 0000000 00000000615 15000221702 0015236 0 ustar 00root root 0000000 0000000 # Samples
## Compile
Assume current directory is `/samples`
z/OS:
```
xlc -g3 -qlanglvl=extc99 -Wc,LP64 -I ../zdnn -o simple_add simple_add.c ../zdnn/lib/libzdnn.x
```
Linux's:
```
gcc -g3 -Wall -fmessage-length=0 -std=c99 -I ../zdnn -o simple_add simple_add.c ../zdnn/lib/libzdnn.so
```
### NOTE: Add `-D STATIC_LIB` to gcc invocation if you're compiling using statically-linked library
zDNN-1.1.2/samples/descriptor_share.c 0000664 0000000 0000000 00000004616 15000221702 0017470 0 ustar 00root root 0000000 0000000 // SPDX-License-Identifier: Apache-2.0
/*
* Copyright IBM Corp. 2021
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#include
#include
#include
#include
#include "zdnn.h"
// ***************************************************************************
// Sample:
//
// Descriptor sharing among zTensors
// ***************************************************************************
int main(int argc, char *argv[]) {
zdnn_tensor_desc *pre_tfrmd_desc, *tfrmd_desc;
zdnn_ztensor ztensor1, ztensor2;
zdnn_status status;
uint32_t dim2 = 32, dim1 = 3;
zdnn_data_types type = FP32;
short element_size = 4; // size of each element in bytes
uint64_t num_elements = dim2 * dim1;
#ifdef STATIC_LIB
zdnn_init();
#endif
void *data1 = malloc(num_elements * element_size);
void *data2 = malloc(num_elements * element_size);
pre_tfrmd_desc = malloc(sizeof(zdnn_tensor_desc));
tfrmd_desc = malloc(sizeof(zdnn_tensor_desc));
zdnn_init_pre_transformed_desc(ZDNN_2DS, type, pre_tfrmd_desc, dim2, dim1);
status = zdnn_generate_transformed_desc_concatenated(pre_tfrmd_desc,
CONCAT_LSTM, tfrmd_desc);
assert(status == ZDNN_OK);
ztensor1.pre_transformed_desc = pre_tfrmd_desc;
ztensor1.transformed_desc = tfrmd_desc;
ztensor2.pre_transformed_desc = pre_tfrmd_desc;
ztensor2.transformed_desc = tfrmd_desc;
status = zdnn_init_ztensor_with_malloc(pre_tfrmd_desc, tfrmd_desc, &ztensor1);
assert(status == ZDNN_OK);
status = zdnn_init_ztensor_with_malloc(pre_tfrmd_desc, tfrmd_desc, &ztensor2);
assert(status == ZDNN_OK);
status = zdnn_transform_ztensor(&ztensor1, data1, data1, data1, data1);
assert(status == ZDNN_OK);
status = zdnn_transform_ztensor(&ztensor2, data2, data2, data2, data2);
assert(status == ZDNN_OK);
free(pre_tfrmd_desc);
free(tfrmd_desc);
free(data2);
free(data1);
}
zDNN-1.1.2/samples/rnn_gru_fwd.c 0000664 0000000 0000000 00000025662 15000221702 0016446 0 ustar 00root root 0000000 0000000 // SPDX-License-Identifier: Apache-2.0
/*
* Copyright IBM Corp. 2021, 2024
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#include
#include
#include
#include
#include "zdnn.h"
// Sample: GRU
int main(int argc, char *argv[]) {
zdnn_status status;
#ifdef STATIC_LIB
zdnn_init();
#endif
/***********************************************************************
*
* GRU (FWD/BWD):
*
* INPUTS --------------------------------------------------------------
* input | ZDNN_3DS | (num_timesteps, num_batches, num_features)
* h0 | ZDNN_3DS | (1, num_batches, num_hidden)
* weights | ZDNN_3DS | (1, num_features, num_hidden)
* input_biases | ZDNN_2DS | (1, num_hidden)
* hidden_weights | ZDNN_3DS | (1, num_hidden, num_hidden)
* hidden_biases | ZDNN_2DS | (1, num_hidden)
*
* OUTPUTS -------------------------------------------------------------
* hn_output | ZDNN_4DS | (num_timesteps, 1, num_batches, num_hidden)
* | | or (1, 1, num_batches, num_hidden)
***********************************************************************/
/***********************************************************************
* Create input zTensor
***********************************************************************/
zdnn_tensor_desc input_pre_tfrmd_desc, input_tfrmd_desc;
zdnn_ztensor input;
uint32_t num_timesteps = 5;
uint32_t num_batches = 3;
uint32_t num_features = 32;
uint32_t num_hidden = 5;
zdnn_data_types type = FP32;
short element_size = 4; // size of each element in bytes
lstm_gru_direction dir = FWD;
uint8_t num_dirs = 1;
zdnn_init_pre_transformed_desc(ZDNN_3DS, type, &input_pre_tfrmd_desc,
num_timesteps, num_batches, num_features);
status =
zdnn_generate_transformed_desc(&input_pre_tfrmd_desc, &input_tfrmd_desc);
assert(status == ZDNN_OK);
status = zdnn_init_ztensor_with_malloc(&input_pre_tfrmd_desc,
&input_tfrmd_desc, &input);
assert(status == ZDNN_OK);
uint64_t input_data_size =
num_timesteps * num_batches * num_features * element_size;
void *input_data = malloc(input_data_size);
status = zdnn_transform_ztensor(&input, input_data);
assert(status == ZDNN_OK);
/***********************************************************************
* Create initial hidden zTensor
***********************************************************************/
zdnn_tensor_desc h0_pre_tfrmd_desc, h0_tfrmd_desc;
zdnn_ztensor h0;
zdnn_init_pre_transformed_desc(ZDNN_3DS, type, &h0_pre_tfrmd_desc, num_dirs,
num_batches, num_hidden);
status = zdnn_generate_transformed_desc(&h0_pre_tfrmd_desc, &h0_tfrmd_desc);
assert(status == ZDNN_OK);
status =
zdnn_init_ztensor_with_malloc(&h0_pre_tfrmd_desc, &h0_tfrmd_desc, &h0);
assert(status == ZDNN_OK);
uint64_t h0_data_size = num_batches * num_hidden * element_size;
void *hidden_state_data = malloc(h0_data_size);
status = zdnn_transform_ztensor(&h0, hidden_state_data);
assert(status == ZDNN_OK);
/***********************************************************************
* Create input weights zTensor
* Resultant zTensor is concatenated
***********************************************************************/
zdnn_tensor_desc weights_pre_tfrmd_desc, weights_tfrmd_desc;
zdnn_ztensor weights;
zdnn_init_pre_transformed_desc(ZDNN_3DS, type, &weights_pre_tfrmd_desc,
num_dirs, num_features, num_hidden);
status = zdnn_generate_transformed_desc_concatenated(
&weights_pre_tfrmd_desc, RNN_TYPE_GRU | USAGE_WEIGHTS | PREV_LAYER_NONE,
&weights_tfrmd_desc);
assert(status == ZDNN_OK);
status = zdnn_init_ztensor_with_malloc(&weights_pre_tfrmd_desc,
&weights_tfrmd_desc, &weights);
assert(status == ZDNN_OK);
uint64_t weights_data_size = num_features * num_hidden * element_size;
void *weights_data_z = malloc(weights_data_size);
void *weights_data_r = malloc(weights_data_size);
void *weights_data_h = malloc(weights_data_size);
status = zdnn_transform_ztensor(&weights, weights_data_z, weights_data_r,
weights_data_h);
assert(status == ZDNN_OK);
/***********************************************************************
* Create biases zTensors
* Resultant zTensors are concatenated
***********************************************************************/
zdnn_tensor_desc biases_pre_tfrmd_desc, biases_tfrmd_desc;
zdnn_ztensor biases;
zdnn_init_pre_transformed_desc(ZDNN_2DS, type, &biases_pre_tfrmd_desc,
num_dirs, num_hidden);
status = zdnn_generate_transformed_desc_concatenated(
&biases_pre_tfrmd_desc, RNN_TYPE_GRU | USAGE_BIASES | PREV_LAYER_NONE,
&biases_tfrmd_desc);
assert(status == ZDNN_OK);
status = zdnn_init_ztensor_with_malloc(&biases_pre_tfrmd_desc,
&biases_tfrmd_desc, &biases);
assert(status == ZDNN_OK);
uint64_t biases_data_size = num_hidden * element_size;
void *biases_data_z = malloc(biases_data_size);
void *biases_data_r = malloc(biases_data_size);
void *biases_data_h = malloc(biases_data_size);
status = zdnn_transform_ztensor(&biases, biases_data_z, biases_data_r,
biases_data_h);
assert(status == ZDNN_OK);
/***********************************************************************
* Create hidden weights zTensor
* Resultant zTensor is concatenated
***********************************************************************/
zdnn_tensor_desc hidden_weights_pre_tfrmd_desc, hidden_weights_tfrmd_desc;
zdnn_ztensor hidden_weights;
zdnn_init_pre_transformed_desc(ZDNN_3DS, type, &hidden_weights_pre_tfrmd_desc,
num_dirs, num_hidden, num_hidden);
status = zdnn_generate_transformed_desc_concatenated(
&hidden_weights_pre_tfrmd_desc,
RNN_TYPE_GRU | USAGE_HIDDEN_WEIGHTS | PREV_LAYER_NONE,
&hidden_weights_tfrmd_desc);
assert(status == ZDNN_OK);
status = zdnn_init_ztensor_with_malloc(&hidden_weights_pre_tfrmd_desc,
&hidden_weights_tfrmd_desc,
&hidden_weights);
assert(status == ZDNN_OK);
uint64_t hidden_weights_data_size = num_hidden * num_hidden * element_size;
void *hidden_weights_data_z = malloc(hidden_weights_data_size);
void *hidden_weights_data_r = malloc(hidden_weights_data_size);
void *hidden_weights_data_h = malloc(hidden_weights_data_size);
status = zdnn_transform_ztensor(&hidden_weights, hidden_weights_data_z,
hidden_weights_data_r, hidden_weights_data_h);
assert(status == ZDNN_OK);
/***********************************************************************
* Create hidden biases zTensors
* Resultant zTensors are concatenated
***********************************************************************/
zdnn_tensor_desc hidden_biases_pre_tfrmd_desc, hidden_biases_tfrmd_desc;
zdnn_ztensor hidden_biases;
zdnn_init_pre_transformed_desc(ZDNN_2DS, type, &hidden_biases_pre_tfrmd_desc,
num_dirs, num_hidden);
status = zdnn_generate_transformed_desc_concatenated(
&hidden_biases_pre_tfrmd_desc,
RNN_TYPE_GRU | USAGE_HIDDEN_BIASES | PREV_LAYER_NONE,
&hidden_biases_tfrmd_desc);
assert(status == ZDNN_OK);
status = zdnn_init_ztensor_with_malloc(
&hidden_biases_pre_tfrmd_desc, &hidden_biases_tfrmd_desc, &hidden_biases);
assert(status == ZDNN_OK);
uint64_t hidden_biases_data_size = num_hidden * element_size;
void *hidden_biases_data_z = malloc(hidden_biases_data_size);
void *hidden_biases_data_r = malloc(hidden_biases_data_size);
void *hidden_biases_data_h = malloc(hidden_biases_data_size);
status = zdnn_transform_ztensor(&hidden_biases, hidden_biases_data_z,
hidden_biases_data_r, hidden_biases_data_h);
assert(status == ZDNN_OK);
/***********************************************************************
* Create output zTensor
***********************************************************************/
// get only the last timestep
zdnn_tensor_desc hn_pre_tfrmd_desc, hn_tfrmd_desc;
zdnn_ztensor hn_output_ztensor;
zdnn_init_pre_transformed_desc(ZDNN_4DS, type, &hn_pre_tfrmd_desc, 1, 1,
num_batches, num_hidden);
status = zdnn_generate_transformed_desc(&hn_pre_tfrmd_desc, &hn_tfrmd_desc);
assert(status == ZDNN_OK);
status = zdnn_init_ztensor_with_malloc(&hn_pre_tfrmd_desc, &hn_tfrmd_desc,
&hn_output_ztensor);
assert(status == ZDNN_OK);
/***********************************************************************
* Call the zAIU
***********************************************************************/
void *work_area = NULL;
status = zdnn_gru(&input, &h0, &weights, &biases, &hidden_weights,
&hidden_biases, dir, work_area, &hn_output_ztensor);
assert(status == ZDNN_OK);
/***********************************************************************
* Output and Cleanup
***********************************************************************/
uint64_t hn_data_size = num_batches * num_hidden * element_size;
void *hn_output_data = malloc(hn_data_size);
status = zdnn_transform_origtensor(&hn_output_ztensor, hn_output_data);
assert(status == ZDNN_OK);
status = zdnn_free_ztensor_buffer(&input);
assert(status == ZDNN_OK);
status = zdnn_free_ztensor_buffer(&h0);
assert(status == ZDNN_OK);
status = zdnn_free_ztensor_buffer(&weights);
assert(status == ZDNN_OK);
status = zdnn_free_ztensor_buffer(&biases);
assert(status == ZDNN_OK);
status = zdnn_free_ztensor_buffer(&hidden_weights);
assert(status == ZDNN_OK);
status = zdnn_free_ztensor_buffer(&hidden_biases);
assert(status == ZDNN_OK);
status = zdnn_free_ztensor_buffer(&hn_output_ztensor);
assert(status == ZDNN_OK);
free(input_data);
free(hidden_state_data);
free(weights_data_z);
free(weights_data_r);
free(weights_data_h);
free(hidden_weights_data_z);
free(hidden_weights_data_r);
free(hidden_weights_data_h);
free(biases_data_z);
free(biases_data_r);
free(biases_data_h);
free(hidden_biases_data_z);
free(hidden_biases_data_r);
free(hidden_biases_data_h);
free(hn_output_data);
}
zDNN-1.1.2/samples/rnn_lstm_bidir.c 0000664 0000000 0000000 00000031525 15000221702 0017134 0 ustar 00root root 0000000 0000000 // SPDX-License-Identifier: Apache-2.0
/*
* Copyright IBM Corp. 2021, 2024
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#include
#include
#include
#include
#include "zdnn.h"
// Sample: LSTM BI-DIR
int main(int argc, char *argv[]) {
zdnn_status status;
#ifdef STATIC_LIB
zdnn_init();
#endif
/***********************************************************************
*
* LSTM (BI-DIR):
*
* INPUTS --------------------------------------------------------------
* input | ZDNN_3DS | (num_timesteps, num_batches, num_features)
* h0 | ZDNN_3DS | (2, num_batches, num_hidden)
* c0 | ZDNN_3DS | (2, num_batches, num_hidden)
* weights | ZDNN_3DS | (2, num_features, num_hidden)
* biases | ZDNN_2DS | (2, num_hidden)
* hidden_weights | ZDNN_3DS | (2, num_hidden, num_hidden)
* hidden_biases | ZDNN_2DS | (2, num_hidden)
*
* OUTPUTS -------------------------------------------------------------
* hn_output | ZDNN_4DS | (num_timesteps, 2, num_batches, num_hidden)
* | | or (1, 2, num_batches, num_hidden)
* cf_output | ZDNN_4DS | (1, 2, num_batches, num_hidden)
***********************************************************************/
/***********************************************************************
* Create input zTensor
***********************************************************************/
zdnn_tensor_desc input_pre_tfrmd_desc, input_tfrmd_desc;
zdnn_ztensor input;
uint32_t num_timesteps = 5;
uint32_t num_batches = 3;
uint32_t num_features = 32;
uint32_t num_hidden = 5;
zdnn_data_types type = FP32;
short element_size = 4; // size of each element in bytes
lstm_gru_direction dir = BIDIR;
uint8_t num_dirs = 2;
zdnn_init_pre_transformed_desc(ZDNN_3DS, type, &input_pre_tfrmd_desc,
num_timesteps, num_batches, num_features);
status =
zdnn_generate_transformed_desc(&input_pre_tfrmd_desc, &input_tfrmd_desc);
assert(status == ZDNN_OK);
status = zdnn_init_ztensor_with_malloc(&input_pre_tfrmd_desc,
&input_tfrmd_desc, &input);
assert(status == ZDNN_OK);
uint64_t input_data_size =
num_timesteps * num_batches * num_features * element_size;
void *input_data = malloc(input_data_size);
status = zdnn_transform_ztensor(&input, input_data);
assert(status == ZDNN_OK);
/***********************************************************************
* Create initial hidden and cell state zTensors
***********************************************************************/
zdnn_tensor_desc h0c0_pre_tfrmd_desc, h0c0_tfrmd_desc;
zdnn_ztensor h0, c0;
zdnn_init_pre_transformed_desc(ZDNN_3DS, type, &h0c0_pre_tfrmd_desc, num_dirs,
num_batches, num_hidden);
status =
zdnn_generate_transformed_desc(&h0c0_pre_tfrmd_desc, &h0c0_tfrmd_desc);
assert(status == ZDNN_OK);
status = zdnn_init_ztensor_with_malloc(&h0c0_pre_tfrmd_desc, &h0c0_tfrmd_desc,
&h0);
assert(status == ZDNN_OK);
status = zdnn_init_ztensor_with_malloc(&h0c0_pre_tfrmd_desc, &h0c0_tfrmd_desc,
&c0);
assert(status == ZDNN_OK);
uint64_t h0c0_data_size = num_batches * num_hidden * element_size;
void *hidden_state_data = malloc(h0c0_data_size);
void *cell_state_data = malloc(h0c0_data_size);
status = zdnn_transform_ztensor(&h0, hidden_state_data);
assert(status == ZDNN_OK);
status = zdnn_transform_ztensor(&c0, cell_state_data);
assert(status == ZDNN_OK);
/***********************************************************************
* Create input weights zTensor
* Resultant zTensor is concatenated
***********************************************************************/
zdnn_tensor_desc weights_pre_tfrmd_desc, weights_tfrmd_desc;
zdnn_ztensor weights;
zdnn_init_pre_transformed_desc(ZDNN_3DS, type, &weights_pre_tfrmd_desc,
num_dirs, num_features, num_hidden);
status = zdnn_generate_transformed_desc_concatenated(
&weights_pre_tfrmd_desc, RNN_TYPE_LSTM | USAGE_WEIGHTS | PREV_LAYER_NONE,
&weights_tfrmd_desc);
assert(status == ZDNN_OK);
status = zdnn_init_ztensor_with_malloc(&weights_pre_tfrmd_desc,
&weights_tfrmd_desc, &weights);
assert(status == ZDNN_OK);
uint64_t weights_data_size = num_features * num_hidden * element_size;
void *weights_data_f = malloc(weights_data_size);
void *weights_data_i = malloc(weights_data_size);
void *weights_data_c = malloc(weights_data_size);
void *weights_data_o = malloc(weights_data_size);
status = zdnn_transform_ztensor(&weights, weights_data_f, weights_data_i,
weights_data_c, weights_data_o);
assert(status == ZDNN_OK);
/***********************************************************************
* Create biases zTensors
* Resultant zTensors are concatenated
***********************************************************************/
zdnn_tensor_desc biases_pre_tfrmd_desc, biases_tfrmd_desc;
zdnn_ztensor biases;
zdnn_init_pre_transformed_desc(ZDNN_2DS, type, &biases_pre_tfrmd_desc,
num_dirs, num_hidden);
status = zdnn_generate_transformed_desc_concatenated(
&biases_pre_tfrmd_desc, RNN_TYPE_LSTM | USAGE_BIASES | PREV_LAYER_NONE,
&biases_tfrmd_desc);
assert(status == ZDNN_OK);
status = zdnn_init_ztensor_with_malloc(&biases_pre_tfrmd_desc,
&biases_tfrmd_desc, &biases);
assert(status == ZDNN_OK);
uint64_t biases_data_size = num_hidden * element_size;
void *biases_data_f = malloc(biases_data_size);
void *biases_data_i = malloc(biases_data_size);
void *biases_data_c = malloc(biases_data_size);
void *biases_data_o = malloc(biases_data_size);
status = zdnn_transform_ztensor(&biases, biases_data_f, biases_data_i,
biases_data_c, biases_data_o);
assert(status == ZDNN_OK);
/***********************************************************************
* Create hidden weights zTensor
* Resultant zTensor is concatenated
***********************************************************************/
zdnn_tensor_desc hidden_weights_pre_tfrmd_desc, hidden_weights_tfrmd_desc;
zdnn_ztensor hidden_weights;
zdnn_init_pre_transformed_desc(ZDNN_3DS, type, &hidden_weights_pre_tfrmd_desc,
num_dirs, num_hidden, num_hidden);
status = zdnn_generate_transformed_desc_concatenated(
&hidden_weights_pre_tfrmd_desc,
RNN_TYPE_LSTM | USAGE_HIDDEN_WEIGHTS | PREV_LAYER_NONE,
&hidden_weights_tfrmd_desc);
assert(status == ZDNN_OK);
status = zdnn_init_ztensor_with_malloc(&hidden_weights_pre_tfrmd_desc,
&hidden_weights_tfrmd_desc,
&hidden_weights);
assert(status == ZDNN_OK);
uint64_t hidden_weights_data_size = num_hidden * num_hidden * element_size;
void *hidden_weights_data_f = malloc(hidden_weights_data_size);
void *hidden_weights_data_i = malloc(hidden_weights_data_size);
void *hidden_weights_data_c = malloc(hidden_weights_data_size);
void *hidden_weights_data_o = malloc(hidden_weights_data_size);
status = zdnn_transform_ztensor(&hidden_weights, hidden_weights_data_f,
hidden_weights_data_i, hidden_weights_data_c,
hidden_weights_data_o);
assert(status == ZDNN_OK);
/***********************************************************************
* Create hidden biases zTensors
* Resultant zTensors are concatenated
***********************************************************************/
zdnn_tensor_desc hidden_biases_pre_tfrmd_desc, hidden_biases_tfrmd_desc;
zdnn_ztensor hidden_biases;
zdnn_init_pre_transformed_desc(ZDNN_2DS, type, &hidden_biases_pre_tfrmd_desc,
num_dirs, num_hidden);
status = zdnn_generate_transformed_desc_concatenated(
&hidden_biases_pre_tfrmd_desc,
RNN_TYPE_LSTM | USAGE_HIDDEN_BIASES | PREV_LAYER_NONE,
&hidden_biases_tfrmd_desc);
assert(status == ZDNN_OK);
status = zdnn_init_ztensor_with_malloc(
&hidden_biases_pre_tfrmd_desc, &hidden_biases_tfrmd_desc, &hidden_biases);
assert(status == ZDNN_OK);
uint64_t hidden_biases_data_size = num_hidden * element_size;
void *hidden_biases_data_f = malloc(hidden_biases_data_size);
void *hidden_biases_data_i = malloc(hidden_biases_data_size);
void *hidden_biases_data_c = malloc(hidden_biases_data_size);
void *hidden_biases_data_o = malloc(hidden_biases_data_size);
status = zdnn_transform_ztensor(&hidden_biases, hidden_biases_data_f,
hidden_biases_data_i, hidden_biases_data_c,
hidden_biases_data_o);
assert(status == ZDNN_OK);
/***********************************************************************
* Create output zTensor
***********************************************************************/
zdnn_tensor_desc hn_pre_tfrmd_desc, hn_tfrmd_desc, cf_pre_tfrmd_desc,
cf_tfrmd_desc;
zdnn_ztensor hn_output_ztensor, cf_output_ztensor;
zdnn_init_pre_transformed_desc(ZDNN_4DS, type, &hn_pre_tfrmd_desc,
num_timesteps, 2, num_batches, num_hidden);
status = zdnn_generate_transformed_desc(&hn_pre_tfrmd_desc, &hn_tfrmd_desc);
assert(status == ZDNN_OK);
zdnn_init_pre_transformed_desc(ZDNN_3DS, type, &cf_pre_tfrmd_desc, 1, 2,
num_batches, num_hidden);
status = zdnn_generate_transformed_desc(&cf_pre_tfrmd_desc, &cf_tfrmd_desc);
assert(status == ZDNN_OK);
status = zdnn_init_ztensor_with_malloc(&hn_pre_tfrmd_desc, &hn_tfrmd_desc,
&hn_output_ztensor);
assert(status == ZDNN_OK);
status = zdnn_init_ztensor_with_malloc(&cf_pre_tfrmd_desc, &cf_tfrmd_desc,
&cf_output_ztensor);
assert(status == ZDNN_OK);
/***********************************************************************
* Call the zAIU
***********************************************************************/
void *work_area = NULL;
status = zdnn_lstm(&input, &h0, &c0, &weights, &biases, &hidden_weights,
&hidden_biases, dir, work_area, &hn_output_ztensor,
&cf_output_ztensor);
assert(status == ZDNN_OK);
/***********************************************************************
* Output and Cleanup
***********************************************************************/
uint64_t hn_data_size =
num_timesteps * 2 * num_batches * num_hidden * element_size;
uint64_t cf_data_size = 2 * num_batches * num_hidden * element_size;
void *hn_output_data = malloc(hn_data_size);
void *cf_output_data = malloc(cf_data_size);
status = zdnn_transform_origtensor(&hn_output_ztensor, hn_output_data);
assert(status == ZDNN_OK);
status = zdnn_transform_origtensor(&cf_output_ztensor, cf_output_data);
assert(status == ZDNN_OK);
status = zdnn_free_ztensor_buffer(&input);
assert(status == ZDNN_OK);
status = zdnn_free_ztensor_buffer(&h0);
assert(status == ZDNN_OK);
status = zdnn_free_ztensor_buffer(&c0);
assert(status == ZDNN_OK);
status = zdnn_free_ztensor_buffer(&weights);
assert(status == ZDNN_OK);
status = zdnn_free_ztensor_buffer(&biases);
assert(status == ZDNN_OK);
status = zdnn_free_ztensor_buffer(&hidden_weights);
assert(status == ZDNN_OK);
status = zdnn_free_ztensor_buffer(&hidden_biases);
assert(status == ZDNN_OK);
status = zdnn_free_ztensor_buffer(&hn_output_ztensor);
assert(status == ZDNN_OK);
status = zdnn_free_ztensor_buffer(&cf_output_ztensor);
assert(status == ZDNN_OK);
free(input_data);
free(hidden_state_data);
free(cell_state_data);
free(weights_data_f);
free(weights_data_i);
free(weights_data_c);
free(weights_data_o);
free(hidden_weights_data_f);
free(hidden_weights_data_i);
free(hidden_weights_data_c);
free(hidden_weights_data_o);
free(biases_data_f);
free(biases_data_i);
free(biases_data_c);
free(biases_data_o);
free(hidden_biases_data_f);
free(hidden_biases_data_i);
free(hidden_biases_data_c);
free(hidden_biases_data_o);
free(hn_output_data);
free(cf_output_data);
}
zDNN-1.1.2/samples/rnn_lstm_fwd.c 0000664 0000000 0000000 00000031050 15000221702 0016614 0 ustar 00root root 0000000 0000000 // SPDX-License-Identifier: Apache-2.0
/*
* Copyright IBM Corp. 2021, 2024
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#include
#include
#include
#include
#include "zdnn.h"
// Sample: LSTM
int main(int argc, char *argv[]) {
zdnn_status status;
#ifdef STATIC_LIB
zdnn_init();
#endif
/***********************************************************************
*
* LSTM (FWD/BWD):
*
* INPUTS --------------------------------------------------------------
* input | ZDNN_3DS | (num_timesteps, num_batches, num_features)
* h0 | ZDNN_3DS | (1, num_batches, num_hidden)
* c0 | ZDNN_3DS | (1, num_batches, num_hidden)
* weights | ZDNN_3DS | (1, num_features, num_hidden)
* biases | ZDNN_2DS | (1, num_hidden)
* hidden_weights | ZDNN_3DS | (1, num_hidden, num_hidden)
* hidden_biases | ZDNN_2DS | (1, num_hidden)
*
* OUTPUTS -------------------------------------------------------------
* hn_output | ZDNN_4DS | (num_timesteps, 1, num_batches, num_hidden)
* | | or (1, 1, num_batches, num_hidden)
* cf_output | ZDNN_4DS | (1, 1, num_batches, num_hidden)
***********************************************************************/
/***********************************************************************
* Create input zTensor
***********************************************************************/
zdnn_tensor_desc input_pre_tfrmd_desc, input_tfrmd_desc;
zdnn_ztensor input;
uint32_t num_timesteps = 5;
uint32_t num_batches = 3;
uint32_t num_features = 32;
uint32_t num_hidden = 5;
zdnn_data_types type = FP32;
short element_size = 4; // size of each element in bytes
lstm_gru_direction dir = FWD;
uint8_t num_dirs = 1;
zdnn_init_pre_transformed_desc(ZDNN_3DS, type, &input_pre_tfrmd_desc,
num_timesteps, num_batches, num_features);
status =
zdnn_generate_transformed_desc(&input_pre_tfrmd_desc, &input_tfrmd_desc);
assert(status == ZDNN_OK);
status = zdnn_init_ztensor_with_malloc(&input_pre_tfrmd_desc,
&input_tfrmd_desc, &input);
assert(status == ZDNN_OK);
uint64_t input_data_size =
num_timesteps * num_batches * num_features * element_size;
void *input_data = malloc(input_data_size);
status = zdnn_transform_ztensor(&input, input_data);
assert(status == ZDNN_OK);
/***********************************************************************
* Create initial hidden and cell state zTensors
***********************************************************************/
zdnn_tensor_desc h0c0_pre_tfrmd_desc, h0c0_tfrmd_desc;
zdnn_ztensor h0, c0;
zdnn_init_pre_transformed_desc(ZDNN_3DS, type, &h0c0_pre_tfrmd_desc, num_dirs,
num_batches, num_hidden);
status =
zdnn_generate_transformed_desc(&h0c0_pre_tfrmd_desc, &h0c0_tfrmd_desc);
assert(status == ZDNN_OK);
status = zdnn_init_ztensor_with_malloc(&h0c0_pre_tfrmd_desc, &h0c0_tfrmd_desc,
&h0);
assert(status == ZDNN_OK);
status = zdnn_init_ztensor_with_malloc(&h0c0_pre_tfrmd_desc, &h0c0_tfrmd_desc,
&c0);
assert(status == ZDNN_OK);
uint64_t h0c0_data_size = num_batches * num_hidden * element_size;
void *hidden_state_data = malloc(h0c0_data_size);
void *cell_state_data = malloc(h0c0_data_size);
status = zdnn_transform_ztensor(&h0, hidden_state_data);
assert(status == ZDNN_OK);
status = zdnn_transform_ztensor(&c0, cell_state_data);
assert(status == ZDNN_OK);
/***********************************************************************
* Create input weights zTensor
* Resultant zTensor is concatenated
***********************************************************************/
zdnn_tensor_desc weights_pre_tfrmd_desc, weights_tfrmd_desc;
zdnn_ztensor weights;
zdnn_init_pre_transformed_desc(ZDNN_3DS, type, &weights_pre_tfrmd_desc,
num_dirs, num_features, num_hidden);
status = zdnn_generate_transformed_desc_concatenated(
&weights_pre_tfrmd_desc, RNN_TYPE_LSTM | USAGE_WEIGHTS | PREV_LAYER_NONE,
&weights_tfrmd_desc);
assert(status == ZDNN_OK);
status = zdnn_init_ztensor_with_malloc(&weights_pre_tfrmd_desc,
&weights_tfrmd_desc, &weights);
assert(status == ZDNN_OK);
uint64_t weights_data_size = num_features * num_hidden * element_size;
void *weights_data_f = malloc(weights_data_size);
void *weights_data_i = malloc(weights_data_size);
void *weights_data_c = malloc(weights_data_size);
void *weights_data_o = malloc(weights_data_size);
status = zdnn_transform_ztensor(&weights, weights_data_f, weights_data_i,
weights_data_c, weights_data_o);
assert(status == ZDNN_OK);
/***********************************************************************
* Create biases zTensors
* Resultant zTensors are concatenated
***********************************************************************/
zdnn_tensor_desc biases_pre_tfrmd_desc, biases_tfrmd_desc;
zdnn_ztensor biases;
zdnn_init_pre_transformed_desc(ZDNN_2DS, type, &biases_pre_tfrmd_desc,
num_dirs, num_hidden);
status = zdnn_generate_transformed_desc_concatenated(
&biases_pre_tfrmd_desc, RNN_TYPE_LSTM | USAGE_BIASES | PREV_LAYER_NONE,
&biases_tfrmd_desc);
assert(status == ZDNN_OK);
status = zdnn_init_ztensor_with_malloc(&biases_pre_tfrmd_desc,
&biases_tfrmd_desc, &biases);
assert(status == ZDNN_OK);
uint64_t biases_data_size = num_hidden * element_size;
void *biases_data_f = malloc(biases_data_size);
void *biases_data_i = malloc(biases_data_size);
void *biases_data_c = malloc(biases_data_size);
void *biases_data_o = malloc(biases_data_size);
status = zdnn_transform_ztensor(&biases, biases_data_f, biases_data_i,
biases_data_c, biases_data_o);
assert(status == ZDNN_OK);
/***********************************************************************
* Create hidden weights zTensor
* Resultant zTensor is concatenated
***********************************************************************/
zdnn_tensor_desc hidden_weights_pre_tfrmd_desc, hidden_weights_tfrmd_desc;
zdnn_ztensor hidden_weights;
zdnn_init_pre_transformed_desc(ZDNN_3DS, type, &hidden_weights_pre_tfrmd_desc,
num_dirs, num_hidden, num_hidden);
status = zdnn_generate_transformed_desc_concatenated(
&hidden_weights_pre_tfrmd_desc,
RNN_TYPE_LSTM | USAGE_HIDDEN_WEIGHTS | PREV_LAYER_NONE,
&hidden_weights_tfrmd_desc);
assert(status == ZDNN_OK);
status = zdnn_init_ztensor_with_malloc(&hidden_weights_pre_tfrmd_desc,
&hidden_weights_tfrmd_desc,
&hidden_weights);
assert(status == ZDNN_OK);
uint64_t hidden_weights_data_size = num_hidden * num_hidden * element_size;
void *hidden_weights_data_f = malloc(hidden_weights_data_size);
void *hidden_weights_data_i = malloc(hidden_weights_data_size);
void *hidden_weights_data_c = malloc(hidden_weights_data_size);
void *hidden_weights_data_o = malloc(hidden_weights_data_size);
status = zdnn_transform_ztensor(&hidden_weights, hidden_weights_data_f,
hidden_weights_data_i, hidden_weights_data_c,
hidden_weights_data_o);
assert(status == ZDNN_OK);
/***********************************************************************
* Create hidden biases zTensors
* Resultant zTensors are concatenated
***********************************************************************/
zdnn_tensor_desc hidden_biases_pre_tfrmd_desc, hidden_biases_tfrmd_desc;
zdnn_ztensor hidden_biases;
zdnn_init_pre_transformed_desc(ZDNN_2DS, type, &hidden_biases_pre_tfrmd_desc,
num_dirs, num_hidden);
status = zdnn_generate_transformed_desc_concatenated(
&hidden_biases_pre_tfrmd_desc,
RNN_TYPE_LSTM | USAGE_HIDDEN_BIASES | PREV_LAYER_NONE,
&hidden_biases_tfrmd_desc);
assert(status == ZDNN_OK);
status = zdnn_init_ztensor_with_malloc(
&hidden_biases_pre_tfrmd_desc, &hidden_biases_tfrmd_desc, &hidden_biases);
assert(status == ZDNN_OK);
uint64_t hidden_biases_data_size = num_hidden * element_size;
void *hidden_biases_data_f = malloc(hidden_biases_data_size);
void *hidden_biases_data_i = malloc(hidden_biases_data_size);
void *hidden_biases_data_c = malloc(hidden_biases_data_size);
void *hidden_biases_data_o = malloc(hidden_biases_data_size);
status = zdnn_transform_ztensor(&hidden_biases, hidden_biases_data_f,
hidden_biases_data_i, hidden_biases_data_c,
hidden_biases_data_o);
assert(status == ZDNN_OK);
/***********************************************************************
* Create output zTensor
***********************************************************************/
// get only the last timestep, thus hn and cf can share descriptor
zdnn_tensor_desc hncf_pre_tfrmd_desc, hncf_tfrmd_desc;
zdnn_ztensor hn_output_ztensor, cf_output_ztensor;
zdnn_init_pre_transformed_desc(ZDNN_4DS, type, &hncf_pre_tfrmd_desc, 1, 1,
num_batches, num_hidden);
status =
zdnn_generate_transformed_desc(&hncf_pre_tfrmd_desc, &hncf_tfrmd_desc);
assert(status == ZDNN_OK);
status = zdnn_init_ztensor_with_malloc(&hncf_pre_tfrmd_desc, &hncf_tfrmd_desc,
&hn_output_ztensor);
assert(status == ZDNN_OK);
status = zdnn_init_ztensor_with_malloc(&hncf_pre_tfrmd_desc, &hncf_tfrmd_desc,
&cf_output_ztensor);
assert(status == ZDNN_OK);
/***********************************************************************
* Call the zAIU
***********************************************************************/
void *work_area = NULL;
status = zdnn_lstm(&input, &h0, &c0, &weights, &biases, &hidden_weights,
&hidden_biases, dir, work_area, &hn_output_ztensor,
&cf_output_ztensor);
assert(status == ZDNN_OK);
/***********************************************************************
* Output and Cleanup
***********************************************************************/
uint64_t hncf_data_size = num_batches * num_hidden * element_size;
void *hn_output_data = malloc(hncf_data_size);
void *cf_output_data = malloc(hncf_data_size);
status = zdnn_transform_origtensor(&hn_output_ztensor, hn_output_data);
assert(status == ZDNN_OK);
status = zdnn_transform_origtensor(&cf_output_ztensor, cf_output_data);
assert(status == ZDNN_OK);
status = zdnn_free_ztensor_buffer(&input);
assert(status == ZDNN_OK);
status = zdnn_free_ztensor_buffer(&h0);
assert(status == ZDNN_OK);
status = zdnn_free_ztensor_buffer(&c0);
assert(status == ZDNN_OK);
status = zdnn_free_ztensor_buffer(&weights);
assert(status == ZDNN_OK);
status = zdnn_free_ztensor_buffer(&biases);
assert(status == ZDNN_OK);
status = zdnn_free_ztensor_buffer(&hidden_weights);
assert(status == ZDNN_OK);
status = zdnn_free_ztensor_buffer(&hidden_biases);
assert(status == ZDNN_OK);
status = zdnn_free_ztensor_buffer(&hn_output_ztensor);
assert(status == ZDNN_OK);
status = zdnn_free_ztensor_buffer(&cf_output_ztensor);
assert(status == ZDNN_OK);
free(input_data);
free(hidden_state_data);
free(cell_state_data);
free(weights_data_f);
free(weights_data_i);
free(weights_data_c);
free(weights_data_o);
free(hidden_weights_data_f);
free(hidden_weights_data_i);
free(hidden_weights_data_c);
free(hidden_weights_data_o);
free(biases_data_f);
free(biases_data_i);
free(biases_data_c);
free(biases_data_o);
free(hidden_biases_data_f);
free(hidden_biases_data_i);
free(hidden_biases_data_c);
free(hidden_biases_data_o);
free(hn_output_data);
free(cf_output_data);
}
zDNN-1.1.2/samples/rnn_lstm_multi_layers.c 0000664 0000000 0000000 00000033356 15000221702 0020560 0 ustar 00root root 0000000 0000000 // SPDX-License-Identifier: Apache-2.0
/*
* Copyright IBM Corp. 2021, 2024
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#include
#include
#include
#include
#include "zdnn.h"
void do_bidir_layer(zdnn_ztensor *input, uint32_t num_hidden,
zdnn_ztensor *hn_output, bool is_prev_layer_bidir) {
zdnn_status status;
uint32_t num_batches = input->pre_transformed_desc->dim2;
// if input is bidir output from previous layer then number of features for
// this layer is 2x of hidden-state size (dim1) of the previous layer
uint32_t num_features =
input->pre_transformed_desc->dim1 * (is_prev_layer_bidir ? 2 : 1);
zdnn_data_types type = FP32;
short element_size = 4; // size of each element in bytes
lstm_gru_direction dir = BIDIR;
uint8_t num_dirs = 2;
/***********************************************************************
* Create initial hidden and cell state zTensors
***********************************************************************/
zdnn_tensor_desc h0c0_pre_tfrmd_desc, h0c0_tfrmd_desc;
zdnn_ztensor h0, c0;
zdnn_init_pre_transformed_desc(ZDNN_3DS, type, &h0c0_pre_tfrmd_desc, num_dirs,
num_batches, num_hidden);
status =
zdnn_generate_transformed_desc(&h0c0_pre_tfrmd_desc, &h0c0_tfrmd_desc);
assert(status == ZDNN_OK);
status = zdnn_init_ztensor_with_malloc(&h0c0_pre_tfrmd_desc, &h0c0_tfrmd_desc,
&h0);
assert(status == ZDNN_OK);
status = zdnn_init_ztensor_with_malloc(&h0c0_pre_tfrmd_desc, &h0c0_tfrmd_desc,
&c0);
assert(status == ZDNN_OK);
uint64_t h0c0_data_size = num_batches * num_hidden * element_size;
void *hidden_state_data = malloc(h0c0_data_size);
void *cell_state_data = malloc(h0c0_data_size);
status = zdnn_transform_ztensor(&h0, hidden_state_data);
assert(status == ZDNN_OK);
status = zdnn_transform_ztensor(&c0, cell_state_data);
assert(status == ZDNN_OK);
/***********************************************************************
* Create input weights zTensor
* Resultant zTensor is concatenated
***********************************************************************/
zdnn_tensor_desc weights_pre_tfrmd_desc, weights_tfrmd_desc;
zdnn_ztensor weights;
// if using previous layer bidir output as input then number of features of
// this layer is
zdnn_init_pre_transformed_desc(ZDNN_3DS, type, &weights_pre_tfrmd_desc,
num_dirs, num_features, num_hidden);
status = zdnn_generate_transformed_desc_concatenated(
&weights_pre_tfrmd_desc,
(is_prev_layer_bidir ? PREV_LAYER_BIDIR : PREV_LAYER_UNI) |
RNN_TYPE_LSTM | USAGE_WEIGHTS,
&weights_tfrmd_desc);
assert(status == ZDNN_OK);
status = zdnn_init_ztensor_with_malloc(&weights_pre_tfrmd_desc,
&weights_tfrmd_desc, &weights);
assert(status == ZDNN_OK);
uint64_t weights_data_size = num_features * num_hidden * element_size;
void *weights_data_f = malloc(weights_data_size);
void *weights_data_i = malloc(weights_data_size);
void *weights_data_c = malloc(weights_data_size);
void *weights_data_o = malloc(weights_data_size);
status = zdnn_transform_ztensor(&weights, weights_data_f, weights_data_i,
weights_data_c, weights_data_o);
assert(status == ZDNN_OK);
/***********************************************************************
* Create biases zTensors
* Resultant zTensors are concatenated
***********************************************************************/
zdnn_tensor_desc biases_pre_tfrmd_desc, biases_tfrmd_desc;
zdnn_ztensor biases;
zdnn_init_pre_transformed_desc(ZDNN_2DS, type, &biases_pre_tfrmd_desc,
num_dirs, num_hidden);
status = zdnn_generate_transformed_desc_concatenated(
&biases_pre_tfrmd_desc,
RNN_TYPE_LSTM | USAGE_BIASES |
(is_prev_layer_bidir ? PREV_LAYER_BIDIR : PREV_LAYER_UNI),
&biases_tfrmd_desc);
assert(status == ZDNN_OK);
status = zdnn_init_ztensor_with_malloc(&biases_pre_tfrmd_desc,
&biases_tfrmd_desc, &biases);
assert(status == ZDNN_OK);
uint64_t biases_data_size = num_hidden * element_size;
void *biases_data_f = malloc(biases_data_size);
void *biases_data_i = malloc(biases_data_size);
void *biases_data_c = malloc(biases_data_size);
void *biases_data_o = malloc(biases_data_size);
status = zdnn_transform_ztensor(&biases, biases_data_f, biases_data_i,
biases_data_c, biases_data_o);
assert(status == ZDNN_OK);
/***********************************************************************
* Create hidden weights zTensor
* Resultant zTensor is concatenated
***********************************************************************/
zdnn_tensor_desc hidden_weights_pre_tfrmd_desc, hidden_weights_tfrmd_desc;
zdnn_ztensor hidden_weights;
zdnn_init_pre_transformed_desc(ZDNN_3DS, type, &hidden_weights_pre_tfrmd_desc,
num_dirs, num_hidden, num_hidden);
status = zdnn_generate_transformed_desc_concatenated(
&hidden_weights_pre_tfrmd_desc,
RNN_TYPE_LSTM | USAGE_HIDDEN_WEIGHTS |
(is_prev_layer_bidir ? PREV_LAYER_BIDIR : PREV_LAYER_UNI),
&hidden_weights_tfrmd_desc);
assert(status == ZDNN_OK);
status = zdnn_init_ztensor_with_malloc(&hidden_weights_pre_tfrmd_desc,
&hidden_weights_tfrmd_desc,
&hidden_weights);
assert(status == ZDNN_OK);
uint64_t hidden_weights_data_size = num_hidden * num_hidden * element_size;
void *hidden_weights_data_f = malloc(hidden_weights_data_size);
void *hidden_weights_data_i = malloc(hidden_weights_data_size);
void *hidden_weights_data_c = malloc(hidden_weights_data_size);
void *hidden_weights_data_o = malloc(hidden_weights_data_size);
status = zdnn_transform_ztensor(&hidden_weights, hidden_weights_data_f,
hidden_weights_data_i, hidden_weights_data_c,
hidden_weights_data_o);
assert(status == ZDNN_OK);
/***********************************************************************
* Create hidden biases zTensors
* Resultant zTensors are concatenated
***********************************************************************/
zdnn_tensor_desc hidden_biases_pre_tfrmd_desc, hidden_biases_tfrmd_desc;
zdnn_ztensor hidden_biases;
zdnn_init_pre_transformed_desc(ZDNN_2DS, type, &hidden_biases_pre_tfrmd_desc,
num_dirs, num_hidden);
status = zdnn_generate_transformed_desc_concatenated(
&hidden_biases_pre_tfrmd_desc,
RNN_TYPE_LSTM | USAGE_HIDDEN_BIASES |
(is_prev_layer_bidir ? PREV_LAYER_BIDIR : PREV_LAYER_UNI),
&hidden_biases_tfrmd_desc);
assert(status == ZDNN_OK);
status = zdnn_init_ztensor_with_malloc(
&hidden_biases_pre_tfrmd_desc, &hidden_biases_tfrmd_desc, &hidden_biases);
assert(status == ZDNN_OK);
uint64_t hidden_biases_data_size = num_hidden * element_size;
void *hidden_biases_data_f = malloc(hidden_biases_data_size);
void *hidden_biases_data_i = malloc(hidden_biases_data_size);
void *hidden_biases_data_c = malloc(hidden_biases_data_size);
void *hidden_biases_data_o = malloc(hidden_biases_data_size);
status = zdnn_transform_ztensor(&hidden_biases, hidden_biases_data_f,
hidden_biases_data_i, hidden_biases_data_c,
hidden_biases_data_o);
assert(status == ZDNN_OK);
/***********************************************************************
* Create cf output zTensor
***********************************************************************/
zdnn_tensor_desc cf_pre_tfrmd_desc, cf_tfrmd_desc;
zdnn_ztensor cf_output_ztensor;
zdnn_init_pre_transformed_desc(ZDNN_4DS, type, &cf_pre_tfrmd_desc, 1, 2,
num_batches, num_hidden);
status = zdnn_generate_transformed_desc(&cf_pre_tfrmd_desc, &cf_tfrmd_desc);
assert(status == ZDNN_OK);
status = zdnn_init_ztensor_with_malloc(&cf_pre_tfrmd_desc, &cf_tfrmd_desc,
&cf_output_ztensor);
assert(status == ZDNN_OK);
/***********************************************************************
* Call the zAIU
***********************************************************************/
void *work_area = NULL;
status =
zdnn_lstm(input, &h0, &c0, &weights, &biases, &hidden_weights,
&hidden_biases, dir, work_area, hn_output, &cf_output_ztensor);
assert(status == ZDNN_OK);
/***********************************************************************
* Cleanup and Return
***********************************************************************/
status = zdnn_free_ztensor_buffer(&h0);
assert(status == ZDNN_OK);
status = zdnn_free_ztensor_buffer(&c0);
assert(status == ZDNN_OK);
status = zdnn_free_ztensor_buffer(&weights);
assert(status == ZDNN_OK);
status = zdnn_free_ztensor_buffer(&biases);
assert(status == ZDNN_OK);
status = zdnn_free_ztensor_buffer(&hidden_weights);
assert(status == ZDNN_OK);
status = zdnn_free_ztensor_buffer(&hidden_biases);
assert(status == ZDNN_OK);
status = zdnn_free_ztensor_buffer(&cf_output_ztensor);
assert(status == ZDNN_OK);
free(hidden_state_data);
free(cell_state_data);
free(weights_data_f);
free(weights_data_i);
free(weights_data_c);
free(weights_data_o);
free(hidden_weights_data_f);
free(hidden_weights_data_i);
free(hidden_weights_data_c);
free(hidden_weights_data_o);
free(biases_data_f);
free(biases_data_i);
free(biases_data_c);
free(biases_data_o);
free(hidden_biases_data_f);
free(hidden_biases_data_i);
free(hidden_biases_data_c);
free(hidden_biases_data_o);
}
// Sample: LSTM multi-layer BIDIR
int main(int argc, char *argv[]) {
zdnn_status status;
#ifdef STATIC_LIB
zdnn_init();
#endif
uint32_t num_hidden[2] = {5, 4};
/***********************************************************************
* Create input zTensor
***********************************************************************/
zdnn_tensor_desc input_pre_tfrmd_desc, input_tfrmd_desc;
zdnn_ztensor input;
uint32_t num_timesteps = 5;
uint32_t num_batches = 3;
uint32_t num_features = 32;
zdnn_data_types type = FP32;
short element_size = 4; // size of each element in bytes
zdnn_init_pre_transformed_desc(ZDNN_3DS, type, &input_pre_tfrmd_desc,
num_timesteps, num_batches, num_features);
status =
zdnn_generate_transformed_desc(&input_pre_tfrmd_desc, &input_tfrmd_desc);
assert(status == ZDNN_OK);
status = zdnn_init_ztensor_with_malloc(&input_pre_tfrmd_desc,
&input_tfrmd_desc, &input);
assert(status == ZDNN_OK);
uint64_t input_data_size =
num_timesteps * num_batches * num_features * element_size;
void *input_data = malloc(input_data_size);
status = zdnn_transform_ztensor(&input, input_data);
assert(status == ZDNN_OK);
/***********************************************************************
* Create 2 hn output zTensors
***********************************************************************/
zdnn_tensor_desc hn_pre_tfrmd_desc[2], hn_tfrmd_desc[2];
zdnn_ztensor hn_output[2];
for (int i = 0; i < 2; i++) {
zdnn_init_pre_transformed_desc(ZDNN_4DS, type, &hn_pre_tfrmd_desc[i],
num_timesteps, 2, num_batches,
num_hidden[i]);
status = zdnn_generate_transformed_desc(&hn_pre_tfrmd_desc[i],
&hn_tfrmd_desc[i]);
assert(status == ZDNN_OK);
status = zdnn_init_ztensor_with_malloc(&hn_pre_tfrmd_desc[i],
&hn_tfrmd_desc[i], &hn_output[i]);
assert(status == ZDNN_OK);
}
/***********************************************************************
* Do the layers
***********************************************************************/
// call the first layer with input, previous layer bidir = false, output goes
// to hn_output[0]
do_bidir_layer(&input, num_hidden[0], &hn_output[0], false);
// call the second layer with hn_output[0] from layer 1, previous layer bidir
// = true, output goes to hn_output[1]
do_bidir_layer(&hn_output[0], num_hidden[1], &hn_output[1], true);
/***********************************************************************
* Output and Cleanup
***********************************************************************/
void *hn_output_data[2];
for (int i = 0; i < 2; i++) {
uint64_t hn_output_data_size = (uint64_t)num_timesteps * num_batches *
num_hidden[i] * 2 * element_size;
hn_output_data[i] = malloc(hn_output_data_size);
status = zdnn_transform_origtensor(&hn_output[i], hn_output_data[i]);
assert(status == ZDNN_OK);
}
status = zdnn_free_ztensor_buffer(&input);
assert(status == ZDNN_OK);
status = zdnn_free_ztensor_buffer(&hn_output[0]);
assert(status == ZDNN_OK);
status = zdnn_free_ztensor_buffer(&hn_output[1]);
assert(status == ZDNN_OK);
free(input_data);
free(hn_output_data[0]);
free(hn_output_data[1]);
}
zDNN-1.1.2/samples/simple_add.c 0000664 0000000 0000000 00000007013 15000221702 0016223 0 ustar 00root root 0000000 0000000 // SPDX-License-Identifier: Apache-2.0
/*
* Copyright IBM Corp. 2021, 2024
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#include
#include
#include
#include
#include
#include "zdnn.h"
// ***************************************************************************
// Sample:
//
// Create 2 zTensors a and b, and add them together via zdnn_add()
// ***************************************************************************
int main(int argc, char *argv[]) {
zdnn_tensor_desc pre_tfrmd_desc, tfrmd_desc;
zdnn_ztensor ztensor_a;
zdnn_ztensor ztensor_b;
zdnn_ztensor ztensor_out;
zdnn_status status;
uint32_t dim_n = 1, dim_h = 32, dim_w = 32, dim_c = 3;
zdnn_data_types type = FP32;
short element_size = 4; // size of each element in bytes
uint64_t num_elements = dim_n * dim_h * dim_w * dim_c;
#ifdef STATIC_LIB
zdnn_init();
#endif
// allocate tensor data storage
void *data1 = malloc(num_elements * element_size);
void *data2 = malloc(num_elements * element_size);
void *data_out = malloc(num_elements * element_size);
// read input_data
// check status for zAIU availability, supported ops, etc. here
// status = zdnn_query();
// set input tensor data to 0 to 127 sequentially and repeat
for (uint64_t i = 0; i < num_elements; i++) {
((float *)data1)[i] = (float)(i & 0x7f);
((float *)data2)[i] = (float)(i & 0x7f);
}
zdnn_init_pre_transformed_desc(ZDNN_NHWC, type, &pre_tfrmd_desc, dim_n, dim_h,
dim_w, dim_c);
// generate transformed shape information
status = zdnn_generate_transformed_desc(&pre_tfrmd_desc, &tfrmd_desc);
assert(status == ZDNN_OK);
// initialize zTensors and allocate 4k-aligned storage via helper function
status =
zdnn_init_ztensor_with_malloc(&pre_tfrmd_desc, &tfrmd_desc, &ztensor_a);
assert(status == ZDNN_OK);
status =
zdnn_init_ztensor_with_malloc(&pre_tfrmd_desc, &tfrmd_desc, &ztensor_b);
assert(status == ZDNN_OK);
status =
zdnn_init_ztensor_with_malloc(&pre_tfrmd_desc, &tfrmd_desc, &ztensor_out);
assert(status == ZDNN_OK);
// transform the feature tensor
status = zdnn_transform_ztensor(&ztensor_a, data1);
assert(status == ZDNN_OK);
status = zdnn_transform_ztensor(&ztensor_b, data2);
assert(status == ZDNN_OK);
// perform element-wise add between the two input tensors
status = zdnn_add(&ztensor_a, &ztensor_b, &ztensor_out);
assert(status == ZDNN_OK);
// transform resultant zTensor back to original data format
status = zdnn_transform_origtensor(&ztensor_out, data_out);
assert(status == ZDNN_OK);
for (uint64_t i = 0; i < num_elements; i++) {
printf("out element %" PRIu64 " %f\n", i, ((float *)data_out)[i]);
}
// Free zTensors
status = zdnn_free_ztensor_buffer(&ztensor_a);
assert(status == ZDNN_OK);
status = zdnn_free_ztensor_buffer(&ztensor_b);
assert(status == ZDNN_OK);
status = zdnn_free_ztensor_buffer(&ztensor_out);
assert(status == ZDNN_OK);
free(data1);
free(data2);
free(data_out);
}
zDNN-1.1.2/samples/simple_concat_lstm.c 0000664 0000000 0000000 00000004524 15000221702 0020005 0 ustar 00root root 0000000 0000000 // SPDX-License-Identifier: Apache-2.0
/*
* Copyright IBM Corp. 2021
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#include
#include
#include
#include
#include "zdnn.h"
// ***************************************************************************
// Sample:
//
// CONCAT_LSTM usage
// ***************************************************************************
int main(int argc, char *argv[]) {
zdnn_tensor_desc *pre_tfrmd_desc, *tfrmd_desc;
zdnn_ztensor ztensor;
zdnn_status status;
uint32_t dim2 = 32, dim1 = 3;
zdnn_data_types type = FP32;
short element_size = 4; // size of each element in bytes
uint64_t num_elements = dim2 * dim1;
#ifdef STATIC_LIB
zdnn_init();
#endif
void *data_forget = malloc(num_elements * element_size),
*data_input = malloc(num_elements * element_size),
*data_cell = malloc(num_elements * element_size),
*data_output = malloc(num_elements * element_size);
pre_tfrmd_desc = malloc(sizeof(zdnn_tensor_desc));
tfrmd_desc = malloc(sizeof(zdnn_tensor_desc));
zdnn_init_pre_transformed_desc(ZDNN_2DS, type, pre_tfrmd_desc, dim2, dim1);
status = zdnn_generate_transformed_desc_concatenated(pre_tfrmd_desc,
CONCAT_LSTM, tfrmd_desc);
assert(status == ZDNN_OK);
ztensor.pre_transformed_desc = pre_tfrmd_desc;
ztensor.transformed_desc = tfrmd_desc;
status = zdnn_allochelper_ztensor(&ztensor);
assert(status == ZDNN_OK);
// gate buffers must be supplied in Forget, Input, Cell, Output (FICO) order
status = zdnn_transform_ztensor(&ztensor, data_forget, data_input, data_cell,
data_output);
assert(status == ZDNN_OK);
free(pre_tfrmd_desc);
free(tfrmd_desc);
free(data_forget);
free(data_input);
free(data_cell);
free(data_output);
}
zDNN-1.1.2/samples/simple_heap.c 0000664 0000000 0000000 00000003636 15000221702 0016417 0 ustar 00root root 0000000 0000000 // SPDX-License-Identifier: Apache-2.0
/*
* Copyright IBM Corp. 2021
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#include
#include
#include
#include
#include "zdnn.h"
// ***************************************************************************
// Sample:
//
// Descriptor allocation on heap
// ***************************************************************************
int main(int argc, char *argv[]) {
zdnn_tensor_desc *pre_tfrmd_desc, *tfrmd_desc;
zdnn_ztensor ztensor;
zdnn_status status;
uint32_t dim_n = 1, dim_h = 32, dim_w = 32, dim_c = 3;
zdnn_data_types type = FP32;
short element_size = 4; // size of each element in bytes
uint64_t num_elements = dim_n * dim_h * dim_w * dim_c;
#ifdef STATIC_LIB
zdnn_init();
#endif
void *data = malloc(num_elements * element_size);
pre_tfrmd_desc = malloc(sizeof(zdnn_tensor_desc));
tfrmd_desc = malloc(sizeof(zdnn_tensor_desc));
zdnn_init_pre_transformed_desc(ZDNN_NHWC, type, pre_tfrmd_desc, dim_n, dim_h,
dim_w, dim_c);
status = zdnn_generate_transformed_desc(pre_tfrmd_desc, tfrmd_desc);
assert(status == ZDNN_OK);
status = zdnn_init_ztensor_with_malloc(pre_tfrmd_desc, tfrmd_desc, &ztensor);
assert(status == ZDNN_OK);
status = zdnn_transform_ztensor(&ztensor, data);
assert(status == ZDNN_OK);
free(pre_tfrmd_desc);
free(tfrmd_desc);
free(data);
}
zDNN-1.1.2/samples/simple_quantized_tensor.c 0000664 0000000 0000000 00000004714 15000221702 0021076 0 ustar 00root root 0000000 0000000 // SPDX-License-Identifier: Apache-2.0
/*
* Copyright IBM Corp. 2023, 2024
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#include
#include
#include
#include
#include
#include "zdnn.h"
// ***************************************************************************
// Sample:
//
// Create a quantized zTensors
// ***************************************************************************
int main(int argc, char *argv[]) {
zdnn_tensor_desc pre_tfrmd_desc, tfrmd_desc;
zdnn_ztensor ztensor;
zdnn_status status;
uint32_t dim_n = 1, dim_h = 32, dim_w = 32, dim_c = 3;
zdnn_data_types type = FP32;
short element_size = 4; // size of each element in bytes
uint64_t num_elements = dim_n * dim_h * dim_w * dim_c;
// allocate tensor data storage
void *data1 = malloc(num_elements * element_size);
// read input_data
// check status for zAIU availability, supported ops, etc. here
// status = zdnn_query();
// set input tensor data to 0 to 127 sequentially and repeat
for (uint64_t i = 0; i < num_elements; i++) {
((float *)data1)[i] = (float)(i & 0x7f);
}
zdnn_init_pre_transformed_desc(ZDNN_NHWC, type, &pre_tfrmd_desc, dim_n, dim_h,
dim_w, dim_c);
float scale = 3;
float offset = 2;
// generate transformed shape information
status = zdnn_generate_quantized_transformed_desc(
&pre_tfrmd_desc, QUANTIZED_DLFLOAT16, &tfrmd_desc);
assert(status == ZDNN_OK);
// initialize zTensors and allocate 4k-aligned storage via helper function
status = zdnn_init_quantized_ztensor_with_malloc(&pre_tfrmd_desc, &tfrmd_desc,
scale, offset, &ztensor);
assert(status == ZDNN_OK);
// transform the feature tensor
status = zdnn_transform_ztensor(&ztensor, data1);
assert(status == ZDNN_OK);
// Free zTensors
status = zdnn_free_ztensor_buffer(&ztensor);
assert(status == ZDNN_OK);
free(data1);
} zDNN-1.1.2/samples/simple_stack.c 0000664 0000000 0000000 00000003425 15000221702 0016603 0 ustar 00root root 0000000 0000000 // SPDX-License-Identifier: Apache-2.0
/*
* Copyright IBM Corp. 2021
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#include
#include
#include
#include
#include "zdnn.h"
// ***************************************************************************
// Sample:
//
// Descriptor allocation on stack
// ***************************************************************************
int main(int argc, char *argv[]) {
zdnn_tensor_desc pre_tfrmd_desc, tfrmd_desc;
zdnn_ztensor ztensor;
zdnn_status status;
uint32_t dim_n = 1, dim_h = 32, dim_w = 32, dim_c = 3;
zdnn_data_types type = FP32;
short element_size = 4; // size of each element in bytes
uint64_t num_elements = dim_n * dim_h * dim_w * dim_c;
#ifdef STATIC_LIB
zdnn_init();
#endif
void *data = malloc(num_elements * element_size);
zdnn_init_pre_transformed_desc(ZDNN_NHWC, type, &pre_tfrmd_desc, dim_n, dim_h,
dim_w, dim_c);
status = zdnn_generate_transformed_desc(&pre_tfrmd_desc, &tfrmd_desc);
assert(status == ZDNN_OK);
status =
zdnn_init_ztensor_with_malloc(&pre_tfrmd_desc, &tfrmd_desc, &ztensor);
assert(status == ZDNN_OK);
status = zdnn_transform_ztensor(&ztensor, data);
assert(status == ZDNN_OK);
free(data);
}
zDNN-1.1.2/samples/stickify_unstickify.c 0000664 0000000 0000000 00000003764 15000221702 0020230 0 ustar 00root root 0000000 0000000 // SPDX-License-Identifier: Apache-2.0
/*
* Copyright IBM Corp. 2021
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#include
#include
#include
#include
#include "zdnn.h"
// ***************************************************************************
// Sample:
//
// Transform raw tensor data to zTensor, then transform the zTensor back
// to original format
// ***************************************************************************
int main(int argc, char *argv[]) {
zdnn_tensor_desc pre_tfrmd_desc, tfrmd_desc;
zdnn_ztensor ztensor;
zdnn_status status;
uint32_t dim_n = 1, dim_h = 32, dim_w = 32, dim_c = 3;
zdnn_data_types type = FP32;
short element_size = 4; // size of each element in bytes
uint64_t num_elements = dim_n * dim_h * dim_w * dim_c;
#ifdef STATIC_LIB
zdnn_init();
#endif
void *data = malloc(num_elements * element_size);
void *data_out = malloc(num_elements * element_size);
zdnn_init_pre_transformed_desc(ZDNN_NHWC, type, &pre_tfrmd_desc, dim_n, dim_h,
dim_w, dim_c);
status = zdnn_generate_transformed_desc(&pre_tfrmd_desc, &tfrmd_desc);
assert(status == ZDNN_OK);
status =
zdnn_init_ztensor_with_malloc(&pre_tfrmd_desc, &tfrmd_desc, &ztensor);
assert(status == ZDNN_OK);
status = zdnn_transform_ztensor(&ztensor, data);
assert(status == ZDNN_OK);
status = zdnn_transform_origtensor(&ztensor, data_out);
assert(status == ZDNN_OK);
free(data);
free(data_out);
}
zDNN-1.1.2/samples/ztensor_reuse.c 0000664 0000000 0000000 00000004102 15000221702 0017025 0 ustar 00root root 0000000 0000000 // SPDX-License-Identifier: Apache-2.0
/*
* Copyright IBM Corp. 2021
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#include
#include
#include
#include
#include "zdnn.h"
// ***************************************************************************
// Sample:
//
// Transform 2 pieces of raw tensor data using the same zdnn_ztensor struct
// ***************************************************************************
int main(int argc, char *argv[]) {
zdnn_tensor_desc pre_tfrmd_desc, tfrmd_desc;
zdnn_ztensor ztensor;
zdnn_status status;
uint32_t dim_n = 1, dim_h = 32, dim_w = 32, dim_c = 3;
zdnn_data_types type = FP32;
short element_size = 4; // size of each element in bytes
uint64_t num_elements = dim_n * dim_h * dim_w * dim_c;
#ifdef STATIC_LIB
zdnn_init();
#endif
void *data1 = malloc(num_elements * element_size);
void *data2 = malloc(num_elements * element_size);
zdnn_init_pre_transformed_desc(ZDNN_NHWC, type, &pre_tfrmd_desc, dim_n, dim_h,
dim_w, dim_c);
status = zdnn_generate_transformed_desc(&pre_tfrmd_desc, &tfrmd_desc);
assert(status == ZDNN_OK);
status =
zdnn_init_ztensor_with_malloc(&pre_tfrmd_desc, &tfrmd_desc, &ztensor);
assert(status == ZDNN_OK);
status = zdnn_transform_ztensor(&ztensor, data1);
assert(status == ZDNN_OK);
zdnn_reset_ztensor(&ztensor);
// essentially overwriting previous stickification buffer with data2's
status = zdnn_transform_ztensor(&ztensor, data2);
assert(status == ZDNN_OK);
free(data1);
free(data2);
}
zDNN-1.1.2/tests/ 0000775 0000000 0000000 00000000000 15000221702 0013453 5 ustar 00root root 0000000 0000000 zDNN-1.1.2/tests/Makefile 0000664 0000000 0000000 00000004554 15000221702 0015123 0 ustar 00root root 0000000 0000000 # SPDX-License-Identifier: Apache-2.0
#
# Copyright IBM Corp. 2021
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
THIRDPARTY_ROOT = third_party
UNITY_ROOT ?= $(THIRDPARTY_ROOT)/Unity
OBJDIR := obj
BINDIR := bin
_dummy := $(shell mkdir -p $(OBJDIR); mkdir -p $(BINDIR))
include ../config.make
INCDIR := $(CFLAGS_NOSEARCH) -I ../zdnn -I $(UNITY_ROOT)/src
ifneq ($(CC),xlc)
ifneq ($(no_rpath),1)
LDFLAGS := $(LDFLAGS) -Wl,-rpath=\$$ORIGIN/../../zdnn/${SODIR}
endif
endif
C_TEST_SUPPORTFILES := $(UNITY_ROOT)/src/unity.c testsupport.c $(wildcard common_*.c)
CXX_TEST_SUPPORTFILES := $(wildcard *.cpp)
TEST_FILES := $(wildcard testDriver*.c)
C_TEST_SUPPORTOBJ := $(patsubst %.c,$(OBJDIR)/%.o,$(notdir $(C_TEST_SUPPORTFILES)))
CXX_TEST_SUPPORTOBJ := $(patsubst %.cpp,$(OBJDIR)/%.o,$(notdir $(CXX_TEST_SUPPORTFILES)))
TEST_OBJ := $(patsubst %.c,$(OBJDIR)/%.o,$(TEST_FILES))
TEST_BINARIES := $(patsubst %.c,$(BINDIR)/%,$(TEST_FILES))
TEST_RESULTS := $(patsubst %.c,$(BINDIR)/%.txt,$(TEST_FILES))
PARSED_RESULT:= `python3 resources/testresult_parser.py`
all: test
.PHONY: test
test: $(TEST_RESULTS) $(TEST_BINARIES) $(TEST_OBJ) $(C_TEST_SUPPORTOBJ) $(CXX_TEST_SUPPORTOBJ)
@echo $(ECHOFLAGS) ${PARSED_RESULT}
# Compile
$(OBJDIR)/%.o: $(UNITY_ROOT)/src/%.c
$(CC) $(INCDIR) $(CFLAGS) -c -o $@ $<
$(OBJDIR)/%.o: $(ARGTABLE3_ROOT)/src/%.c
$(CC) $(INCDIR) $(CFLAGS) -c -o $@ $<
$(OBJDIR)/%.o: %.c
$(CC) $(INCDIR) $(CFLAGS) -c -o $@ $<
$(OBJDIR)/%.o: %.cpp
$(CXX) $(INCDIR) $(CXXFLAGS) -c -o $@ $<
# Link
$(BINDIR)/testDriver_%: $(OBJDIR)/testDriver_%.o $(C_TEST_SUPPORTOBJ) $(CXX_TEST_SUPPORTOBJ)
$(CXX) $(INCDIR) $(CXXFLAGS) -o $@ $< $(C_TEST_SUPPORTOBJ) $(CXX_TEST_SUPPORTOBJ) $(LDFLAGS) $(LDFLAGS_TEST)
# Run testcase
$(BINDIR)/%.txt: $(BINDIR)/%
-$(LD_PATH_VAR)=../zdnn/$(SODIR) ZDNN_LOGLEVEL=off ./$< > $@
.PHONY: clean
clean:
$(RM) $(OBJDIR)/* *~ core
$(RM) $(BINDIR)/* *~ core
zDNN-1.1.2/tests/common_act.h 0000664 0000000 0000000 00000001473 15000221702 0015750 0 ustar 00root root 0000000 0000000 // SPDX-License-Identifier: Apache-2.0
/*
* Copyright IBM Corp. 2021
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#ifndef TESTS_COMMON_ACT_H_
#define TESTS_COMMON_ACT_H_
#include "testsupport.h"
#include
#include
#include
#include
#endif /* TESTS_COMMON_ACT_H_ */
zDNN-1.1.2/tests/common_elwise.c 0000664 0000000 0000000 00000027340 15000221702 0016465 0 ustar 00root root 0000000 0000000 // SPDX-License-Identifier: Apache-2.0
/*
* Copyright IBM Corp. 2021
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#include "common_elwise.h"
/**
* helper function to compute the natural log without using math.h
*/
float ln(float x) {
float old_sum = 0.0;
float xmlxpl = (x - 1) / (x + 1);
float xmlxpl_2 = xmlxpl * xmlxpl;
float denom = 1.0;
float frac = xmlxpl;
float term = frac; // denom start from 1.0
float sum = term;
while (sum != old_sum) {
old_sum = sum;
denom += 2.0;
frac *= xmlxpl_2;
sum += frac / denom;
}
return 2.0 * sum;
}
/**
* Helper function to compute output tensor values using elementwise
* natural log
*/
void elwise_log(float input[], float output[], int num_elems,
zdnn_data_types type) {
for (int i = 0; i < num_elems; i++) {
if (input[i] > 0) {
switch (type) {
case (BFLOAT):
output[i] = ln(CLEANSE_BFLOAT(input[i]));
break;
case (FP16):
output[i] = ln(CLEANSE_FP16(input[i]));
break;
case (FP32):
output[i] = ln(CLEANSE_FP32(input[i]));
break;
default:
break;
}
}
}
}
/**
* Helper function to compute output tensor values using elementwise
* exponential
*/
void elwise_exp(float input[], float output[], int num_elems,
zdnn_data_types type) {
for (int i = 0; i < num_elems; i++) {
switch (type) {
case (BFLOAT):
output[i] = exp(CLEANSE_BFLOAT(input[i]));
break;
case (FP16):
output[i] = exp(CLEANSE_FP16(input[i]));
break;
case (FP32):
output[i] = exp(CLEANSE_FP32(input[i]));
break;
default:
break;
}
}
}
/**
* Helper function to compute output tensor values using elementwise add
*/
void elwise_add(float input1[], float input2[], float output[], int num_elems,
zdnn_data_types type) {
for (int i = 0; i < num_elems; i++) {
switch (type) {
case (BFLOAT):
output[i] = CLEANSE_BFLOAT(input1[i]) + CLEANSE_BFLOAT(input2[i]);
break;
case (FP16):
output[i] = CLEANSE_FP16(input1[i]) + CLEANSE_FP16(input2[i]);
break;
case (FP32):
output[i] = CLEANSE_FP32(input1[i]) + CLEANSE_FP32(input2[i]);
break;
default:
break;
}
}
}
/**
* Helper function to compute output tensor values using elementwise sub
*/
void elwise_sub(float input1[], float input2[], float output[], int num_elems,
zdnn_data_types type) {
for (int i = 0; i < num_elems; i++) {
switch (type) {
case (BFLOAT):
output[i] = CLEANSE_BFLOAT(input1[i]) - CLEANSE_BFLOAT(input2[i]);
break;
case (FP16):
output[i] = CLEANSE_FP16(input1[i]) - CLEANSE_FP16(input2[i]);
break;
case (FP32):
output[i] = CLEANSE_FP32(input1[i]) - CLEANSE_FP32(input2[i]);
break;
default:
break;
}
}
}
/**
* Helper function to compute output tensor values using elementwise
* division
*/
void elwise_div(float input1[], float input2[], float output[], int num_elems,
zdnn_data_types type) {
for (int i = 0; i < num_elems; i++) {
switch (type) {
case (BFLOAT):
output[i] = CLEANSE_BFLOAT(input1[i]) / CLEANSE_BFLOAT(input2[i]);
break;
case (FP16):
output[i] = CLEANSE_FP16(input1[i]) / CLEANSE_FP16(input2[i]);
break;
case (FP32):
output[i] = CLEANSE_FP32(input1[i]) / CLEANSE_FP32(input2[i]);
break;
default:
break;
}
}
}
/**
* Helper function to compute output tensor values using elementwise
* multiplication
*/
void elwise_mul(float input1[], float input2[], float output[], int num_elems,
zdnn_data_types type) {
for (int i = 0; i < num_elems; i++) {
switch (type) {
case (BFLOAT):
output[i] = CLEANSE_BFLOAT(input1[i]) * CLEANSE_BFLOAT(input2[i]);
break;
case (FP16):
output[i] = CLEANSE_FP16(input1[i]) * CLEANSE_FP16(input2[i]);
break;
case (FP32):
output[i] = CLEANSE_FP32(input1[i]) * CLEANSE_FP32(input2[i]);
break;
default:
break;
}
}
}
/**
* Helper function to compute output tensor values using elementwise
* minimum
*/
void elwise_min(float input1[], float input2[], float output[], int num_elems,
zdnn_data_types type) {
for (int i = 0; i < num_elems; i++) {
switch (type) {
case (BFLOAT):
output[i] = (input1[i] < input2[i]) ? CLEANSE_BFLOAT(input1[i])
: CLEANSE_BFLOAT(input2[i]);
break;
case (FP16):
output[i] = (input1[i] < input2[i]) ? CLEANSE_FP16(input1[i])
: CLEANSE_FP16(input2[i]);
break;
case (FP32):
output[i] = (input1[i] < input2[i]) ? CLEANSE_FP32(input1[i])
: CLEANSE_FP32(input2[i]);
break;
default:
break;
}
}
}
/**
* Helper function to compute output tensor values using elementwise
* maximum
*/
void elwise_max(float input1[], float input2[], float output[], int num_elems,
zdnn_data_types type) {
for (int i = 0; i < num_elems; i++) {
switch (type) {
case (BFLOAT):
output[i] = (input1[i] > input2[i]) ? CLEANSE_BFLOAT(input1[i])
: CLEANSE_BFLOAT(input2[i]);
break;
case (FP16):
output[i] = (input1[i] > input2[i]) ? CLEANSE_FP16(input1[i])
: CLEANSE_FP16(input2[i]);
break;
case (FP32):
output[i] = (input1[i] > input2[i]) ? CLEANSE_FP32(input1[i])
: CLEANSE_FP32(input2[i]);
break;
default:
break;
}
}
}
/**
* Helper function to run end to end elementwise tests that only have
* one input tensor
*/
void test_elwise_api_1_input(uint32_t *shape, zdnn_data_layouts layout,
float *input_values,
nnpa_function_code function_code,
zdnn_status expected_status) {
// Create ztensor with input_values
zdnn_ztensor *input_ztensor = alloc_ztensor_with_values(
shape, layout, test_datatype, NO_CONCAT, false, input_values);
// Create output ztensor initialized to 0's
zdnn_ztensor *output_ztensor = alloc_ztensor_with_values(
shape, layout, test_datatype, NO_CONCAT, true, ZERO_ARRAY);
// calculate number of values in each tensor buffer for helper function
uint64_t num_elements = get_num_elements(output_ztensor, ELEMENTS_PRE);
// Values in ZDNN_NHWC order
float expected_values[num_elements];
char api_method[AIU_METHOD_STR_LENGTH] = "zdnn_";
zdnn_status status = GENERAL_TESTCASE_FAILURE;
switch (function_code) {
case NNPA_LOG:
strcpy(api_method, "zdnn_log");
// Use public zDNN method to make NNPA call to zAIU
status = zdnn_log(input_ztensor, output_ztensor);
// fill expected_values array with calculated expected values using
// helper function
elwise_log(input_values, expected_values, num_elements, test_datatype);
break;
case NNPA_EXP:
strcpy(api_method, "zdnn_exp");
// Use public zDNN method to make NNPA call to zAIU
status = zdnn_exp(input_ztensor, output_ztensor);
// fill expected_values array with calculated expected values using
// helper function
elwise_exp(input_values, expected_values, num_elements, test_datatype);
break;
default:
TEST_FAIL_MESSAGE_FORMATTED("unsupported function_code: %d", function_code);
break;
}
TEST_ASSERT_MESSAGE_FORMATTED(
status == expected_status,
"call to %s() to returned status %08x but expected %08x", api_method,
status, expected_status);
// Only check expected values if expected status is ZDNN_OK
if (expected_status == ZDNN_OK) {
assert_ztensor_values(output_ztensor, false, expected_values);
}
// Cleanup test tensor buffers
free_ztensor_buffers(2, input_ztensor, output_ztensor);
}
/**
* Helper function to run end to end elementwise tests that only have
* two input tensors. This version allows the user to select
* which type (FP32, Bfloat or FP16) they are testing.
*/
void test_elwise_api_2_inputs_adv(uint32_t *shape, zdnn_data_layouts layout,
zdnn_data_types type, float *input1_values,
float *input2_values,
nnpa_function_code function_code,
zdnn_status expected_status) {
// Create ztensor with input1_values
zdnn_ztensor *input1_ztensor = alloc_ztensor_with_values(
shape, layout, type, NO_CONCAT, false, input1_values);
// Create ztensor with input2_values
zdnn_ztensor *input2_ztensor = alloc_ztensor_with_values(
shape, layout, type, NO_CONCAT, false, input2_values);
// Create output ztensor initialized to 0's
zdnn_ztensor *output_ztensor = alloc_ztensor_with_values(
shape, layout, type, NO_CONCAT, true, ZERO_ARRAY);
// calculate number of values in each tensor buffer for helper function
uint64_t num_elements = get_num_elements(output_ztensor, ELEMENTS_PRE);
// Values in ZDNN_NHWC order
float expected_values[num_elements];
char api_method[AIU_METHOD_STR_LENGTH];
zdnn_status status = GENERAL_TESTCASE_FAILURE;
// Use public zDNN method to make NNPA call to zAIU
// then fill expected_values array with calculated expected values using
// helper function if we expect to succeed. Otherwise don't bother.
#define CASE(func_code, func_name) \
case func_code: \
strcpy(api_method, "zdnn_" #func_name); \
status = zdnn_##func_name(input1_ztensor, input2_ztensor, output_ztensor); \
elwise_##func_name(input1_values, input2_values, expected_values, \
num_elements, type); \
break;
switch (function_code) {
CASE(NNPA_MAX, max)
CASE(NNPA_MIN, min)
CASE(NNPA_ADD, add)
CASE(NNPA_SUB, sub)
CASE(NNPA_MUL, mul)
CASE(NNPA_DIV, div)
default:
TEST_FAIL_MESSAGE_FORMATTED("unsupported function_code: %d", function_code);
break;
}
TEST_ASSERT_MESSAGE_FORMATTED(
status == expected_status,
"call to %s() to returned status %08x but expected %08x", api_method,
status, expected_status);
// Only check expected values if expected status is ZDNN_OK
if (expected_status == ZDNN_OK) {
assert_ztensor_values(output_ztensor, false, expected_values);
}
// Cleanup test tensor buffers
free_ztensor_buffers(3, input1_ztensor, input2_ztensor, output_ztensor);
}
/**
* Helper function to run end to end elementwise tests that only have
* two input tensors. This version tests all supported data types by
* looping over the supported data types (FP32, Bfloat and FP16)
* calling test_elwise_ap_2_input_adv for each.
*/
void test_elwise_api_2_inputs(uint32_t *shape, zdnn_data_layouts layout,
float *input1_values, float *input2_values,
nnpa_function_code function_code,
zdnn_status expected_status) {
test_elwise_api_2_inputs_adv(shape, layout, test_datatype, input1_values,
input2_values, function_code, expected_status);
}
zDNN-1.1.2/tests/common_elwise.h 0000664 0000000 0000000 00000003147 15000221702 0016471 0 ustar 00root root 0000000 0000000 // SPDX-License-Identifier: Apache-2.0
/*
* Copyright IBM Corp. 2021
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#ifndef TESTS_COMMON_ELWISE_H_
#define TESTS_COMMON_ELWISE_H_
#include "testsupport.h"
#include
void test_elwise_api_1_input(uint32_t *shape, zdnn_data_layouts layout,
float *input_values,
nnpa_function_code function_code,
zdnn_status expected_status);
void test_elwise_api_2_inputs(uint32_t *shape, zdnn_data_layouts layout,
float *input1_values, float *input2_values,
nnpa_function_code function_code,
zdnn_status expected_status);
void test_elwise_api_2_inputs_adv(uint32_t *shape, zdnn_data_layouts layout,
zdnn_data_types type, float *input1_values,
float *input2_values,
nnpa_function_code function_code,
zdnn_status expected_status);
#endif /* TESTS_COMMON_ELWISE_H_ */
zDNN-1.1.2/tests/common_pool.c 0000664 0000000 0000000 00000007057 15000221702 0016151 0 ustar 00root root 0000000 0000000 // SPDX-License-Identifier: Apache-2.0
/*
* Copyright IBM Corp. 2021
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#include "common_pool.h"
/// Call public API and checks returned status and values matches expected.
///
/// \return nothing but throws test failure if actual status status doesn't
/// match expected. An error is also thrown if expected status is ZDNN_OK but
/// actual output values to not match expected input values.
///
void test_pool_function(nnpa_function_code function_code, uint32_t *input_shape,
zdnn_data_layouts input_layout,
bool repeat_first_input_value, float *input_values,
zdnn_pool_padding padding_type, uint32_t kernel_height,
uint32_t kernel_width, uint32_t stride_height,
uint32_t stride_width, uint32_t *output_shape,
zdnn_data_layouts output_layout,
zdnn_status expected_status,
bool repeat_first_expected_value,
float *expected_values) {
// Create input and output ztensors
zdnn_ztensor *input_ztensor = alloc_ztensor_with_values(
input_shape, input_layout, test_datatype, NO_CONCAT,
repeat_first_input_value, input_values);
zdnn_ztensor *output_ztensor = alloc_ztensor_with_values(
output_shape, output_layout, test_datatype, NO_CONCAT, true, ZERO_ARRAY);
char api_method[AIU_METHOD_STR_LENGTH] = "zdnn_";
zdnn_status status = GENERAL_TESTCASE_FAILURE;
// Call public NNPA method
switch (function_code) {
case NNPA_AVGPOOL2D:
strcpy(api_method, "zdnn_avgpool2d");
status =
zdnn_avgpool2d(input_ztensor, padding_type, kernel_height, kernel_width,
stride_height, stride_width, output_ztensor);
break;
case NNPA_MAXPOOL2D:
strcpy(api_method, "zdnn_maxpool2d");
status =
zdnn_maxpool2d(input_ztensor, padding_type, kernel_height, kernel_width,
stride_height, stride_width, output_ztensor);
break;
default:
TEST_FAIL_MESSAGE_FORMATTED("unsupported function_code: %d", function_code);
break;
}
// Assert returned status matches expected
TEST_ASSERT_MESSAGE_FORMATTED(
status == expected_status,
"call to %s() to returned status %08x \"%s\" but expected %08x \"%s\"",
api_method, status, zdnn_get_status_message(status), expected_status,
zdnn_get_status_message(expected_status));
fp_tolerance *tol = NULL;
switch (output_ztensor->pre_transformed_desc->type) {
case BFLOAT:
tol = &tol_bfloat;
break;
case FP16:
tol = &tol_fp16;
break;
case FP32:
tol = &tol_fp32;
break;
default:
break;
// should never get here
}
// If expected status is ZDNN_OK, assert output values matches expected
if (expected_status == ZDNN_OK) {
assert_ztensor_values_adv(output_ztensor, repeat_first_expected_value,
expected_values, *tol);
}
// Cleanup test ztensors
free_ztensor_buffers(2, input_ztensor, output_ztensor);
}
zDNN-1.1.2/tests/common_pool.h 0000664 0000000 0000000 00000003353 15000221702 0016151 0 ustar 00root root 0000000 0000000 // SPDX-License-Identifier: Apache-2.0
/*
* Copyright IBM Corp. 2021
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#ifndef TESTS_COMMON_POOL_H_
#define TESTS_COMMON_POOL_H_
#include "testsupport.h"
#include
// Restrictions placed on pooling ops. If they're changed, update the API
// documentation for all pool (avg, max, meanreduce2d) ops!
#define MAXIMUM_POOL_ZERO_STRIDES_KERNEL_SIZE 1024
#define MAXIMUM_POOL_NONZERO_STRIDES_HEIGHT_WIDTH 1024
#define MAXIMUM_POOL_NONZERO_STRIDES_KERNEL_SIZE 64
#define MAXIMUM_POOL_NONZERO_STRIDES_STRIDE_SIZE 30
void test_pool_function(nnpa_function_code function_code, uint32_t *input_shape,
zdnn_data_layouts input_layout,
bool repeat_first_input_value, float *input_values,
zdnn_pool_padding padding_type, uint32_t kernel_height,
uint32_t kernel_width, uint32_t stride_height,
uint32_t stride_width, uint32_t *output_shape,
zdnn_data_layouts output_layout,
zdnn_status expected_status,
bool repeat_first_expected_value,
float *expected_values);
#endif /* TESTS_COMMON_POOL_H_ */
zDNN-1.1.2/tests/common_quantization.c 0000664 0000000 0000000 00000042017 15000221702 0017721 0 ustar 00root root 0000000 0000000 // SPDX-License-Identifier: Apache-2.0
/*
* Copyright IBM Corp. 2023
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#include "common_quantization.h"
#include "convert.h"
#include "testsupport.h"
#include
#include
/// Creates a ztensor with the provided values. Values are converted to the
/// specified type. The resulting ztensor is transformed and ready for use in
/// zDNN operations.
///
/// \note This method does not check that the size of values matches expected
/// number of elements.
///
/// Example usage:
/// Setup input tensor
/// \code
/// ztensor *zt = alloc_quantized_ztensor_with_values(
/// shape, pre_tfrmd_layout, INT8, QUANTIZED_INT8, values, scale, offset);
/// \endcode
/// Setup Output tensor
/// \code
/// ztensor *zt = alloc_quantized_ztensor_with_values(
/// shape, pre_tfrmd_layout, ZDNN_DLFLOAT16, QUANTIZED_DLFLOAT16, NULL,
/// scale, offset);
/// \endcode
///
/// \param[in] shape array of dimensions
/// \param[in] pre_tfrmd_layout pre-transformed data layout
/// \param[in] type data type
/// \param[in] transform_type quantized data type
/// \param[in] values_data float data
/// \param[in] scale quantization scale
/// \param[in] offset quantization offset (zero point)
///
/// \return zdnn_ztensor* Pointer to a malloc'd ztensor with transformed data
///
zdnn_ztensor *alloc_quantized_ztensor_with_values(
uint32_t *shape, zdnn_data_layouts pre_tfrmd_layout, zdnn_data_types type,
zdnn_quantized_transform_types transform_type, const float *values_data,
const float scale, const float offset) {
// Create the pretransformed description
zdnn_tensor_desc *pre_tfrmd_desc =
(zdnn_tensor_desc *)malloc(sizeof(zdnn_tensor_desc));
switch (pre_tfrmd_layout) {
case (ZDNN_1D):
zdnn_init_pre_transformed_desc(pre_tfrmd_layout, type, pre_tfrmd_desc,
shape[0]);
break;
case (ZDNN_2D):
case (ZDNN_2DS):
zdnn_init_pre_transformed_desc(pre_tfrmd_layout, type, pre_tfrmd_desc,
shape[0], shape[1]);
break;
case (ZDNN_3DS):
zdnn_init_pre_transformed_desc(pre_tfrmd_layout, type, pre_tfrmd_desc,
shape[0], shape[1], shape[2]);
break;
default:
TEST_FAIL_MESSAGE_FORMATTED(
"I'm dreadfully sorry but I don't seem to know how to deal with a %s "
"pre_tfrmd_layout. Could you teach me?",
get_data_layout_str(pre_tfrmd_layout));
break;
}
// Create the transformed description
zdnn_tensor_desc *tfrmd_desc =
(zdnn_tensor_desc *)malloc(sizeof(zdnn_tensor_desc));
zdnn_status status = zdnn_generate_quantized_transformed_desc(
pre_tfrmd_desc, transform_type, tfrmd_desc);
TEST_ASSERT_MESSAGE_FORMATTED(
status == ZDNN_OK,
"zdnn_generate_transformed_desc failed (status = %08x)", status);
// Create the ztensor with malloc'd buffer pointer
zdnn_ztensor *ztensor = (zdnn_ztensor *)malloc(sizeof(zdnn_ztensor));
status = zdnn_init_quantized_ztensor_with_malloc(pre_tfrmd_desc, tfrmd_desc,
scale, offset, ztensor);
TEST_ASSERT_MESSAGE_FORMATTED(
status == ZDNN_OK, "zdnn_init_ztensor_with_malloc failed (status = %08x)",
status);
if (transform_type == QUANTIZED_INT8) {
status = zdnn_transform_quantized_ztensor(ztensor, false, INT8_MIN,
INT8_MAX, values_data);
} else if (transform_type == QUANTIZED_WEIGHTS_INT8) {
size_t num_elements =
tfrmd_desc->dim4 * tfrmd_desc->dim2 * tfrmd_desc->dim1;
int8_t quant_data[num_elements];
for (size_t i = 0; i < num_elements; ++i) {
quant_data[i] = QUANTIZE(values_data[i], scale, offset);
}
status = zdnn_transform_quantized_ztensor(ztensor, false, INT8_MIN,
INT8_MAX, quant_data);
}
TEST_ASSERT_MESSAGE_FORMATTED(
status == ZDNN_OK,
"zdnn_transform_quantized_ztensor failed with status %08x \"%s\"", status,
zdnn_get_status_message(status));
return ztensor;
}
/// Asserts each value in the stickified ztensor are within 1.0 of the given
/// expected float values.
///
/// \note This method does not check that the size of values array matches the
/// number of elements. If there's not enough expected values, the test will
/// likely fail when garbage data is pulled in as the expected value.
///
/// Example usage:
/// \code
/// assert_quantized_ztensor_values(&ztensor, false, values);
/// \endcode
///
/// \param[in] ztensor pointer to zdnn_ztensor with actual values
/// \param[in] repeat_first_expected_value if true, all ztensor values will be
/// compared to values[0]
/// \param[in] expected_vals array of expected quantized values
///
/// \return None (assert fails if any actual value not within expected range)
///
void assert_quantized_ztensor_values(zdnn_ztensor *ztensor,
bool repeat_first_expected_value,
const float *expected_vals) {
zdnn_status status;
zdnn_tensor_desc *pre_tfrmd_desc = ztensor->pre_transformed_desc;
uint64_t num_elements = 0;
switch (ztensor->transformed_desc->layout) {
case ZDNN_1D:
case ZDNN_2D:
case ZDNN_2DS:
case ZDNN_3D:
case ZDNN_3DS:
case ZDNN_4D:
case ZDNN_4DS:
case ZDNN_NHWC:
num_elements = get_num_elements(ztensor, ELEMENTS_PRE);
break;
case ZDNN_FICO:
case ZDNN_ZRH:
TEST_FAIL_MESSAGE_FORMATTED(
"does not support %s layout as we don't support unstickifying "
"concatenated ztensors.",
get_data_layout_str(ztensor->transformed_desc->layout));
break;
default:
TEST_FAIL_MESSAGE_FORMATTED(
"I'm dreadfully sorry but I don't seem to know how to deal with a %s "
"layout. Could you teach me?",
get_data_layout_str(ztensor->transformed_desc->layout));
break;
}
// Malloc error_message as it will be large if num_elements is large.
uint64_t big_error_message_size =
(uint64_t)sizeof(char) * ERROR_MESSAGE_STR_LENGTH * num_elements;
char *error_msg = malloc(big_error_message_size);
float *actual_vals;
// Get unstickified data from ztensor to actual_vals[]
actual_vals = malloc(num_elements * get_data_type_size(pre_tfrmd_desc->type));
status = zdnn_transform_origtensor(ztensor, actual_vals);
snprintf(error_msg, big_error_message_size,
"zdnn_transform_origtensor failed (status = %08x)", status);
TEST_ASSERT_MESSAGE(status == ZDNN_OK, error_msg);
// Assert ztentor's values (converted back to floats) match does not match
// too many times
bool all_pass = true;
// Loop appends to error_msg so reset it first
error_msg[0] = '\0';
char *error_fmt = "Element %" PRIu64 " == %f expecting %f";
char *error_fmt2 = " <==== FAILED (diff beyond 1.0)";
// Compared the actual and expected values
for (uint64_t i = 0; i < num_elements; i++) {
bool is_almost_equal = false;
switch (pre_tfrmd_desc->type) {
case FP32: {
float actual = actual_vals[i];
float expected = expected_vals[i];
snprintf(error_msg + strlen(error_msg),
big_error_message_size - strlen(error_msg), error_fmt, i, actual,
expected);
LOG_DEBUG(error_fmt, i, actual, expected);
is_almost_equal = fabs(fabs(actual) - fabs(expected)) <= 1.f;
break;
}
default:
// NOTE: Along with undefined types, DLFLOAT types will also come down
// this path. DLFLOATS are a stickified types which are not valid types
// for the pre_tfrmd_desc (ie prestickifed description).
snprintf(error_msg, big_error_message_size, "unsupported type: %d\n",
pre_tfrmd_desc->type);
TEST_FAIL_MESSAGE(error_msg);
break;
}
if (!is_almost_equal) {
snprintf(error_msg + strlen(error_msg),
big_error_message_size - strlen(error_msg), error_fmt2);
all_pass = false;
}
snprintf(error_msg + strlen(error_msg),
big_error_message_size - strlen(error_msg), "\n");
}
// Assert that all passed and clean up temp data
TEST_ASSERT_MESSAGE(all_pass, error_msg);
free(actual_vals);
free(error_msg);
}
/// Asserts each value in the stickified ztensor are within 1.0 of the given
/// expected float values.
///
/// \note This method does not check that the size of values array matches the
/// number of elements. If there's not enough expected values, the test will
/// likely fail when garbage data is pulled in as the expected value.
///
/// Example usage:
/// \code
/// assert_dequantized_ztensor_values(&ztensor, false, values);
/// \endcode
///
/// \param[in] ztensor pointer to zdnn_ztensor with actual values
/// \param[in] repeat_first_expected_value if true, all ztensor values will be
/// compared to values[0]
/// \param[in] expected_vals array of expected quantized values
///
/// \return None (assert fails if any actual value not within expected range)
///
void assert_dequantized_ztensor_values(zdnn_ztensor *ztensor,
bool repeat_first_expected_value,
const float *expected_vals) {
zdnn_status status;
zdnn_tensor_desc *pre_tfrmd_desc = ztensor->pre_transformed_desc;
uint64_t num_elements = 0;
switch (ztensor->transformed_desc->layout) {
case ZDNN_1D:
case ZDNN_2D:
case ZDNN_2DS:
case ZDNN_3D:
case ZDNN_3DS:
case ZDNN_4D:
case ZDNN_4DS:
case ZDNN_NHWC:
num_elements = get_num_elements(ztensor, ELEMENTS_PRE);
break;
case ZDNN_FICO:
case ZDNN_ZRH:
TEST_FAIL_MESSAGE_FORMATTED(
"does not support %s layout as we don't support unstickifying "
"concatenated ztensors.",
get_data_layout_str(ztensor->transformed_desc->layout));
break;
default:
TEST_FAIL_MESSAGE_FORMATTED(
"I'm dreadfully sorry but I don't seem to know how to deal with a %s "
"layout. Could you teach me?",
get_data_layout_str(ztensor->transformed_desc->layout));
break;
}
// Malloc error_message as it will be large if num_elements is large.
uint64_t big_error_message_size =
(uint64_t)sizeof(char) * ERROR_MESSAGE_STR_LENGTH * num_elements;
char *error_msg = malloc(big_error_message_size);
float *actual_vals;
// Get unstickified data from ztensor to actual_vals[]
actual_vals = malloc(num_elements * get_data_type_size(pre_tfrmd_desc->type));
status = zdnn_transform_origtensor(ztensor, actual_vals);
snprintf(error_msg, big_error_message_size,
"zdnn_transform_origtensor failed (status = %08x)", status);
TEST_ASSERT_MESSAGE(status == ZDNN_OK, error_msg);
// Assert ztentor's values (converted back to floats) match does not match
// too many times
bool all_pass = true;
// Loop appends to error_msg so reset it first
error_msg[0] = '\0';
char *error_fmt = "Element %" PRIu64 " == %f expecting %f";
char *error_fmt2 = " <==== FAILED (diff beyond 1.0)";
// Compared the actual and expected values
for (uint64_t i = 0; i < num_elements; i++) {
bool is_almost_equal = false;
switch (pre_tfrmd_desc->type) {
case FP32: {
// expected values are quantized, so we need to quantized the dequantized
// actual values before comparison.
float actual =
QUANTIZE(actual_vals[i], (1.f / ztensor->rec_scale), ztensor->offset);
float expected = expected_vals[i];
snprintf(error_msg + strlen(error_msg),
big_error_message_size - strlen(error_msg), error_fmt, i, actual,
expected);
LOG_DEBUG(error_fmt, i, actual, expected);
is_almost_equal = fabs(fabs(actual) - fabs(expected)) <= 1.f;
break;
}
default:
// NOTE: Along with undefined types, DLFLOAT types will also come down
// this path. DLFLOATS are a stickified types which are not valid types
// for the pre_tfrmd_desc (ie prestickifed description).
snprintf(error_msg, big_error_message_size, "unsupported type: %d\n",
pre_tfrmd_desc->type);
TEST_FAIL_MESSAGE(error_msg);
break;
}
if (!is_almost_equal) {
snprintf(error_msg + strlen(error_msg),
big_error_message_size - strlen(error_msg), error_fmt2);
all_pass = false;
}
snprintf(error_msg + strlen(error_msg),
big_error_message_size - strlen(error_msg), "\n");
}
// Assert that all passed and clean up temp data
TEST_ASSERT_MESSAGE(all_pass, error_msg);
free(actual_vals);
free(error_msg);
}
/// Asserts that no more than 3% of the values are not equal to expected values.
///
/// \note This method does not check that the size of values array matches the
/// number of elements. If there's not enough expected values, the test will
/// likely fail when garbage data is pulled in as the expected value.
///
/// Example usage:
/// \code
/// assert_quantized_ztensor_values(&ztensor, false, values);
/// \endcode
///
/// \param[in] ztensor pointer to zdnn_ztensor with actual values
/// \param[in] repeat_first_expected_value if true, all ztensor values will be
/// compared to values[0]
/// \param[in] expected_vals array of expected values
///
/// \return None (assert fails if any actual value not within expected range)
///
void assert_quantized_ztensor_compare_values(zdnn_ztensor *ztensor,
bool repeat_first_expected_value,
const float *expected_vals) {
zdnn_status status;
zdnn_tensor_desc *pre_tfrmd_desc = ztensor->pre_transformed_desc;
uint64_t num_elements = 0;
switch (ztensor->transformed_desc->layout) {
case ZDNN_1D:
case ZDNN_2D:
case ZDNN_2DS:
case ZDNN_3D:
case ZDNN_3DS:
case ZDNN_4D:
case ZDNN_4DS:
case ZDNN_NHWC:
num_elements = get_num_elements(ztensor, ELEMENTS_PRE);
break;
case ZDNN_FICO:
case ZDNN_ZRH:
TEST_FAIL_MESSAGE_FORMATTED(
"does not support %s layout as we don't support unstickifying "
"concatenated ztensors.",
get_data_layout_str(ztensor->transformed_desc->layout));
break;
default:
TEST_FAIL_MESSAGE_FORMATTED(
"I'm dreadfully sorry but I don't seem to know how to deal with a %s "
"layout. Could you teach me?",
get_data_layout_str(ztensor->transformed_desc->layout));
break;
}
// Malloc error_message as it will be large if num_elements is large.
uint64_t big_error_message_size =
(uint64_t)sizeof(char) * ERROR_MESSAGE_STR_LENGTH * num_elements;
char *error_msg = malloc(big_error_message_size);
float *actual_vals;
// Get unstickified data from ztensor to actual_vals[]
actual_vals = malloc(num_elements * get_data_type_size(pre_tfrmd_desc->type));
status = zdnn_transform_origtensor(ztensor, actual_vals);
snprintf(error_msg, big_error_message_size,
"zdnn_transform_origtensor failed (status = %08x)", status);
TEST_ASSERT_MESSAGE(status == ZDNN_OK, error_msg);
// Assert ztentor's values (converted back to floats) match does not match
// too many times
uint64_t num_mismatch = 0;
// Loop appends to error_msg so reset it first
error_msg[0] = '\0';
char *error_fmt = "Element %" PRIu64 " == %f expecting %f";
char *error_fmt2 = " <==== FAILED (diff beyond 0.0)";
// Compared the actual and expected values
for (uint64_t i = 0; i < num_elements; i++) {
bool is_equal = false;
switch (pre_tfrmd_desc->type) {
case FP32: {
float actual = actual_vals[i];
float expected = expected_vals[i];
snprintf(error_msg + strlen(error_msg),
big_error_message_size - strlen(error_msg), error_fmt, i, actual,
expected);
LOG_DEBUG(error_fmt, i, actual, expected);
is_equal = actual == expected;
break;
}
default:
// NOTE: Along with undefined types, DLFLOAT types will also come down
// this path. DLFLOATS are a stickified types which are not valid types
// for the pre_tfrmd_desc (ie prestickifed description).
snprintf(error_msg, big_error_message_size, "unsupported type: %d\n",
pre_tfrmd_desc->type);
TEST_FAIL_MESSAGE(error_msg);
break;
}
if (!is_equal) {
snprintf(error_msg + strlen(error_msg),
big_error_message_size - strlen(error_msg), error_fmt2);
num_mismatch++;
}
snprintf(error_msg + strlen(error_msg),
big_error_message_size - strlen(error_msg), "\n");
}
bool enough_pass = (float)num_mismatch / (float)num_elements < 0.01f;
// Assert that all passed and clean up temp data
TEST_ASSERT_MESSAGE(enough_pass, error_msg);
free(actual_vals);
free(error_msg);
}
zDNN-1.1.2/tests/common_quantization.h 0000664 0000000 0000000 00000005470 15000221702 0017730 0 ustar 00root root 0000000 0000000 // SPDX-License-Identifier: Apache-2.0
/*
* Copyright IBM Corp. 2023
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#include "zdnn.h"
#include "zdnn_private.h"
/**
* Helper macro that given the indices and sizes of a multidimensional array
* returns equivalent index to a flat representation of the same array. The
* result is cast to uint64_t as that's the largest number of total elements a
* ztensor supports as opposed to the single dimension maximum of unint32_t
*
* Note: Default usage is for 3D arrays. For 2D arrays, use 0 for the
* undefined dimension's index and 1 its size.
*/
#define GET_FLAT_IDX(stack, row, col, row_size, col_size) \
(uint64_t)(stack) * (row_size) * (col_size) + (row) * (col_size) + (col)
/**
* Helper macro that given a real value, a scale, and an offset, will produce
* a quantized value clipped between the limits for a signed eight-bit integer.
*/
#define QUANTIZE(r, scale, offset) \
(MIN(MAX(roundf(r / scale + offset), -128.f), 127.f))
/**
* Helper macro that given a quantized value, a scale, and an offset, will
* produce a real value clipped.
*/
#define DEQUANTIZE(q, scale, offset) ((q - offset) * scale)
/**
* Helper macro that qunatizes and then dequantizes a real value using a scale
* and an offset.
*/
#define CLEANSE_QUANTIZED(r, scale, offset) \
(DEQUANTIZE(QUANTIZE(r, scale, offset), scale, offset))
zdnn_ztensor *alloc_quantized_ztensor_with_values(
uint32_t *shape, zdnn_data_layouts pre_tfrmd_layout, zdnn_data_types type,
zdnn_quantized_transform_types transform_type, const float *values_data,
const float scale, const float offset);
void assert_quantized_ztensor_values(zdnn_ztensor *ztensor,
bool repeat_first_expected_value,
const float *expected_vals);
void assert_dequantized_ztensor_values(zdnn_ztensor *ztensor,
bool repeat_first_expected_value,
const float *expected_vals);
void assert_quantized_ztensor_compare_values(zdnn_ztensor *ztensor,
bool repeat_first_expected_value,
const float *expected_vals);
zDNN-1.1.2/tests/common_rnn.c 0000664 0000000 0000000 00000027515 15000221702 0015776 0 ustar 00root root 0000000 0000000 // SPDX-License-Identifier: Apache-2.0
/*
* Copyright IBM Corp. 2021
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#include "common_rnn.h"
#include
#include
#include
/// Returns the size in bytes required for a RNN work_area buffer.
///
/// \param[in] rnn_layer RNN layer type (ie LSTM or GRU)
/// \param[in] batch_size batch size for the RNN
/// \param[in] num_timesteps number of timesteps in the RNN
/// \param[in] hidden_state_size number of hidden states in the RNN
///
/// \return number of bytes required for work_area based on RNN values or
/// throws a test failure.
///
size_t calc_rnn_work_area_size(uint8_t function_code, uint32_t batch_size,
uint32_t num_timesteps,
uint32_t hidden_state_size,
lstm_gru_direction direction) {
if (function_code != NNPA_LSTMACT && function_code != NNPA_GRUACT) {
TEST_FAIL_MESSAGE_FORMATTED("NNPA function code %d is not supported.",
function_code);
}
uint32_t padded_hidden_state_size = CEIL(hidden_state_size, 64) * 64 * 4;
uint32_t num_gates = get_func_code_num_gates(function_code);
// Initialize descs for work area
zdnn_tensor_desc fused_desc, bias_add_desc, c_desc;
init_transformed_desc(ZDNN_4D, ZDNN_DLFLOAT16, ZDNN_FORMAT_4DFEATURE,
&fused_desc, num_timesteps, 1, batch_size,
padded_hidden_state_size);
init_transformed_desc(ZDNN_4D, ZDNN_DLFLOAT16, ZDNN_FORMAT_4DFEATURE,
&bias_add_desc, num_gates, 1, batch_size,
hidden_state_size);
init_transformed_desc(ZDNN_4D, ZDNN_DLFLOAT16, ZDNN_FORMAT_4DFEATURE, &c_desc,
2, 1, batch_size, hidden_state_size);
size_t work_area_size =
zdnn_getsize_ztensor(&fused_desc) + zdnn_getsize_ztensor(&bias_add_desc);
if (function_code == NNPA_LSTMACT) {
work_area_size += zdnn_getsize_ztensor(&c_desc);
}
if (direction == BIDIR) {
work_area_size *= 2;
}
return work_area_size;
}
/// Allocates a 4k aligned work area buffer based on the given size and returns
/// a pointer to the memory.
///
/// \param[in] work_area_size size in bytes required for the work_] area
///
/// \return pointer to the work area buffer or throws test failure
///
void *alloc_rnn_work_area(size_t work_area_size) {
void *work_area = NULL;
if (!(work_area = malloc_aligned_4k(work_area_size))) {
TEST_FAIL_MESSAGE_FORMATTED("malloc_aligned_4k (%zu) failed",
work_area_size);
}
memset(work_area, 0, work_area_size);
return work_area;
}
/// Call public API and checks returned status matches expected status. If OK
/// status expected, confirm actual output values match expected values.
///
/// \param[in] rnn_layer Type of RNN layer (ie LSTM or GRU). For LSTM
/// weights and biases will use all four gates values (FICO order)
/// and c0 and cf inputs. For GRU weights and biases use the first
/// three gate values (ZRH order). GRU ignores all g3 values and all
/// c0 and cf related inputs.
/// \param[in] ... shapes, layouts, and values to create required tensors.
/// \param[in] direction RNN layer direction (ie FWD, BWD, BIDIR)
/// \param[in] exp_status Expected status for the public API call
///
/// \return nothing but throws test failure if values don't match
/// expected or an unexpected failure prevents the test from completing.
///
void test_zdnn_api_lstm_gru(
uint8_t function_code,
uint32_t *input_shape, zdnn_data_layouts input_layout, float *input_values,
uint32_t *h0_shape, zdnn_data_layouts h0_layout, float *h0_values,
uint32_t *c0_shape, zdnn_data_layouts c0_layout, float *c0_values,
uint32_t *input_weights_shape, zdnn_data_layouts input_weights_layout,
float *input_weights_g0_values, float *input_weights_g1_values,
float *input_weights_g2_values, float *input_weights_g3_values,
uint32_t *input_biases_shape, zdnn_data_layouts input_biases_layout,
float *input_biases_g0_values, float *input_biases_g1_values,
float *input_biases_g2_values, float *input_biases_g3_values,
uint32_t *hidden_weights_shape, zdnn_data_layouts hidden_weights_layout,
float *hidden_weights_g0_values, float *hidden_weights_g1_values,
float *hidden_weights_g2_values, float *hidden_weights_g3_values,
uint32_t *hidden_biases_shape, zdnn_data_layouts hidden_biases_layout,
float *hidden_biases_g0_values, float *hidden_biases_g1_values,
float *hidden_biases_g2_values, float *hidden_biases_g3_values,
uint32_t *hn_out_shape, zdnn_data_layouts hn_out_layout,
float *exp_hn_out_values,
uint32_t *cf_out_shape, zdnn_data_layouts cf_out_layout,
float *exp_cf_out_values,
lstm_gru_direction direction, zdnn_status exp_status) {
char api_method[AIU_METHOD_STR_LENGTH] = "zdnn_";
if (function_code != NNPA_LSTMACT && function_code != NNPA_GRUACT) {
TEST_FAIL_MESSAGE_FORMATTED("NNPA function code %d is not supported.",
function_code);
}
// Run test for each pretransformed data type
zdnn_ztensor *input, *h0, *c0, *weights, *biases, *hidden_weights,
*hidden_biases;
input = alloc_ztensor_with_values(input_shape, input_layout, test_datatype,
NO_CONCAT, false, input_values);
h0 = alloc_ztensor_with_values(h0_shape, h0_layout, test_datatype, NO_CONCAT,
false, h0_values);
if (function_code == NNPA_LSTMACT) {
// Pass all four gate buffers (FICO) to alloc_ztensor
weights = alloc_ztensor_with_values(
input_weights_shape, input_weights_layout, test_datatype,
RNN_TYPE_LSTM | PREV_LAYER_UNI | USAGE_WEIGHTS, false,
input_weights_g0_values, input_weights_g1_values,
input_weights_g2_values, input_weights_g3_values);
biases = alloc_ztensor_with_values(
input_biases_shape, input_biases_layout, test_datatype,
RNN_TYPE_LSTM | USAGE_BIASES, false, input_biases_g0_values,
input_biases_g1_values, input_biases_g2_values, input_biases_g3_values);
hidden_weights = alloc_ztensor_with_values(
hidden_weights_shape, hidden_weights_layout, test_datatype,
RNN_TYPE_LSTM | USAGE_HIDDEN_WEIGHTS, false, hidden_weights_g0_values,
hidden_weights_g1_values, hidden_weights_g2_values,
hidden_weights_g3_values);
hidden_biases = alloc_ztensor_with_values(
hidden_biases_shape, hidden_biases_layout, test_datatype,
RNN_TYPE_LSTM | USAGE_HIDDEN_BIASES, false, hidden_biases_g0_values,
hidden_biases_g1_values, hidden_biases_g2_values,
hidden_biases_g3_values);
// Alloc c0 ztensor
c0 = alloc_ztensor_with_values(c0_shape, c0_layout, test_datatype,
NO_CONCAT, false, c0_values);
} else {
// Pass three gate buffers (ZRH) to alloc_ztensor, the fourth isn't used
// in GRU.
weights = alloc_ztensor_with_values(
input_weights_shape, input_weights_layout, test_datatype,
RNN_TYPE_GRU | PREV_LAYER_UNI | USAGE_WEIGHTS, false,
input_weights_g0_values, input_weights_g1_values,
input_weights_g2_values);
biases = alloc_ztensor_with_values(
input_biases_shape, input_biases_layout, test_datatype,
RNN_TYPE_GRU | USAGE_BIASES, false, input_biases_g0_values,
input_biases_g1_values, input_biases_g2_values);
hidden_weights = alloc_ztensor_with_values(
hidden_weights_shape, hidden_weights_layout, test_datatype,
RNN_TYPE_GRU | USAGE_HIDDEN_WEIGHTS, false, hidden_weights_g0_values,
hidden_weights_g1_values, hidden_weights_g2_values);
hidden_biases = alloc_ztensor_with_values(
hidden_biases_shape, hidden_biases_layout, test_datatype,
RNN_TYPE_GRU | USAGE_HIDDEN_BIASES, false, hidden_biases_g0_values,
hidden_biases_g1_values, hidden_biases_g2_values);
c0 = NULL; // just so the compiler won't complain about uninitialized c0
}
// Get some basic shape info from the shapes of the various inputs
uint32_t batch_size = input->transformed_desc->dim2;
uint32_t num_timesteps = input->transformed_desc->dim4;
uint32_t hidden_state_size = h0->transformed_desc->dim1;
// Run API once NULL work_area and again with work_area set.
for (int work_area_pass = 0; work_area_pass < 2; work_area_pass++) {
zdnn_ztensor *hn_out, *cf_out;
hn_out =
alloc_ztensor_with_values(hn_out_shape, hn_out_layout, test_datatype,
NO_CONCAT, true, ZERO_ARRAY);
size_t work_area_size = 0;
void *work_area = NULL;
void *zeroed_work_area = NULL;
// Set work_area during second pass
if (work_area_pass == 1) {
work_area_size =
calc_rnn_work_area_size(function_code, batch_size, num_timesteps,
hidden_state_size, direction);
work_area = alloc_rnn_work_area(work_area_size);
zeroed_work_area = alloc_rnn_work_area(work_area_size);
memset(zeroed_work_area, 0, work_area_size);
}
zdnn_status status = GENERAL_TESTCASE_FAILURE;
// Call to correct API based on layer type
if (function_code == NNPA_LSTMACT) {
cf_out =
alloc_ztensor_with_values(cf_out_shape, cf_out_layout, test_datatype,
NO_CONCAT, true, ZERO_ARRAY);
// Make API call and confirm status matches expected
strcpy(api_method, "zdnn_lstm");
status = zdnn_lstm(input, h0, c0, weights, biases, hidden_weights,
hidden_biases, direction, work_area, hn_out, cf_out);
} else if (function_code == NNPA_GRUACT) {
strcpy(api_method, "zdnn_gru");
status = zdnn_gru(input, h0, weights, biases, hidden_weights,
hidden_biases, direction, work_area, hn_out);
}
TEST_ASSERT_MESSAGE_FORMATTED(status == exp_status,
"work_area_pass %d call to %s() returned "
"status %08x \"%s\" but expected %08x \"%s\"",
work_area_pass, api_method, status,
zdnn_get_status_message(status), exp_status,
zdnn_get_status_message(exp_status));
// Check that work_area was written to on second pass
if (work_area_pass == 1) {
if (exp_status == ZDNN_OK &&
!memcmp(work_area, zeroed_work_area, work_area_size)) {
TEST_FAIL_MESSAGE_FORMATTED(
"%s() - expected work_area have been written to but it "
"contains all zeros",
__func__);
}
free_aligned_4k(work_area);
free_aligned_4k(zeroed_work_area);
}
// Confirm per timestep output tensor values match expected values
if (exp_status == ZDNN_OK) {
assert_ztensor_values(hn_out, false, exp_hn_out_values);
}
free_ztensor_buffers(1, hn_out);
// (LSTM only) Confirm final cell state tensor values match expected
if (function_code == NNPA_LSTMACT) {
if (exp_status == ZDNN_OK) {
assert_ztensor_values(cf_out, false, exp_cf_out_values);
}
free_ztensor_buffers(1, cf_out);
}
} // end of work_area_pass loop
// Free input tensors
free_ztensor_buffers(6, input, h0, weights, biases, hidden_weights,
hidden_biases);
if (function_code == NNPA_LSTMACT) {
free_ztensor_buffers(1, c0);
}
}
zDNN-1.1.2/tests/common_rnn.h 0000664 0000000 0000000 00000004452 15000221702 0015776 0 ustar 00root root 0000000 0000000 // SPDX-License-Identifier: Apache-2.0
/*
* Copyright IBM Corp. 2021
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#ifndef TESTS_COMMON_RNN_H_
#define TESTS_COMMON_RNN_H_
#include "testsupport.h"
void test_zdnn_api_lstm_gru(
uint8_t function_code,
uint32_t *input_shape, zdnn_data_layouts input_layout, float *input_values,
uint32_t *h0_shape, zdnn_data_layouts h0_layout, float *h0_values,
uint32_t *c0_shape, zdnn_data_layouts c0_layout, float *c0_values,
uint32_t *input_weights_shape, zdnn_data_layouts input_weights_layout,
float *input_weights_g0_values, float *input_weights_g1_values,
float *input_weights_g2_values, float *input_weights_g3_values,
uint32_t *input_biases_shape, zdnn_data_layouts input_biases_layout,
float *input_biases_g0_values, float *input_biases_g1_values,
float *input_biases_g2_values, float *input_biases_g3_values,
uint32_t *hidden_weights_shape, zdnn_data_layouts hidden_weights_layout,
float *hidden_weights_g0_values, float *hidden_weights_g1_values,
float *hidden_weights_g2_values, float *hidden_weights_g3_values,
uint32_t *hidden_biases_shape, zdnn_data_layouts hidden_biases_layout,
float *hidden_biases_g0_values, float *hidden_biases_g1_values,
float *hidden_biases_g2_values, float *hidden_biases_g3_values,
uint32_t *hn_out_shape, zdnn_data_layouts hn_out_layout,
float *exp_hn_out_values,
uint32_t *cf_out_shape, zdnn_data_layouts cf_out_layout,
float *exp_cf_out_values,
lstm_gru_direction direction, zdnn_status exp_status);
// this macro assume values of NNPA_LSTMACT and NNPA_GRUACT are next to each
// other
#define LOOP_LSTM_AND_GRU(lg) \
for (int lg = NNPA_LSTMACT; lg <= NNPA_GRUACT; lg++)
#endif /* TESTS_COMMON_RNN_H_ */
zDNN-1.1.2/tests/resources/ 0000775 0000000 0000000 00000000000 15000221702 0015465 5 ustar 00root root 0000000 0000000 zDNN-1.1.2/tests/resources/testresult_parser.py 0000664 0000000 0000000 00000006465 15000221702 0021644 0 ustar 00root root 0000000 0000000 # SPDX-License-Identifier: Apache-2.0
#
# Copyright IBM Corp. 2021, 2024
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
#
import glob
import os
import argparse
parser = argparse.ArgumentParser()
parser.add_argument(
"--dir", type=str, default="bin/", help="directory with test results"
)
args = parser.parse_args()
results_dir = args.dir
if not results_dir.endswith("/"):
results_dir = f"{results_dir}/"
results_path = f"{results_dir}testDriver*.txt"
num_passes = 0
num_ignores = 0
num_fails = 0
# accumulative pass/ignore/fail messages
pass_txt = ""
ignore_txt = ""
fail_txt = ""
# other things we care about (crashes, etc.)
notes_txt = ""
# escaped newline for make
NL = "\\n"
for filename in glob.glob(results_path):
if os.stat(filename).st_size == 0:
notes_txt = notes_txt + filename + " is a 0 byte file. Likely crashed." + NL
else:
for line in open(filename, "r"):
line = line.strip()
if ":PASS" in line or ":IGNORE" in line or ":FAIL" in line:
test_file, line_num, test_name, status = line.split(":", 3)
test_file = test_file.strip()
line_num = line_num.strip()
test_name = test_name.strip()
status = status.strip()
if "PASS" in status:
num_passes = num_passes + 1
pass_txt = pass_txt + test_file + ":" + test_name + NL
if "IGNORE" in status:
num_ignores = num_ignores + 1
ignore_txt = (
ignore_txt + test_file + ":" + test_name + ":" + status + NL
)
if "FAIL" in status:
num_fails = num_fails + 1
fail_txt = (
fail_txt + test_file + ":" + test_name + ":" + status + NL
)
# Unity prints a "final status" text at the end. If the last line isn't either
# of these then likely the testDriver crashed
if line != "FAIL" and line != "OK":
notes_txt = notes_txt + filename + " did not finish. Likely crashed." + NL
# print the whole report as one big string so that make won't random insert a space
# in between every print()
print(
f"-----------------------{NL}PASSES{NL}-----------------------{NL}"
+ pass_txt
+ f"{NL}-----------------------{NL}IGNORES{NL}-----------------------{NL}"
+ ignore_txt
+ f"{NL}-----------------------{NL}FAILURES{NL}-----------------------{NL}"
+ fail_txt
+ f"{NL}------------------------------------------------------------{NL}"
+ f"total = {num_passes + num_ignores + num_fails}, num_passes = {num_passes},"
+ f" num_ignores = {num_ignores}, num_fails = {num_fails}{NL}"
+ f"{NL}------------------------------------------------------------{NL}"
+ notes_txt
)
zDNN-1.1.2/tests/testDriver_available_apis.c 0000664 0000000 0000000 00000024736 15000221702 0021002 0 ustar 00root root 0000000 0000000 // SPDX-License-Identifier: Apache-2.0
/*
* Copyright IBM Corp. 2024
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#include
#include
#include
#include
#include "testsupport.h"
// cppcheck-suppress unusedFunction
void setUp(void) { VERIFY_HW_ENV; }
// cppcheck-suppress unusedFunction
void tearDown(void) {}
void test_nnpa_add() {
TEST_ASSERT_MESSAGE(true == is_operation_available(ZDNN_ADD),
"Expected is_operation_available() to return true.");
}
void test_nnpa_div() {
TEST_ASSERT_MESSAGE(true == is_operation_available(ZDNN_DIV),
"Expected is_operation_available() to return true.");
}
void test_nnpa_min() {
TEST_ASSERT_MESSAGE(true == is_operation_available(ZDNN_MIN),
"Expected is_operation_available() to return true.");
}
void test_nnpa_max() {
TEST_ASSERT_MESSAGE(true == is_operation_available(ZDNN_MAX),
"Expected is_operation_available() to return true.");
}
void test_nnpa_log() {
TEST_ASSERT_MESSAGE(true == is_operation_available(ZDNN_LOG),
"Expected is_operation_available() to return true.");
}
void test_nnpa_sig() {
TEST_ASSERT_MESSAGE(true == is_operation_available(ZDNN_SIGMOID),
"Expected is_operation_available() to return true.");
}
void test_nnpa_exp() {
TEST_ASSERT_MESSAGE(true == is_operation_available(ZDNN_EXP),
"Expected is_operation_available() to return true.");
}
void test_nnpa_tahn() {
TEST_ASSERT_MESSAGE(true == is_operation_available(ZDNN_TANH),
"Expected is_operation_available() to return true.");
}
void test_nnpa_batchnorm() {
TEST_ASSERT_MESSAGE(true == is_operation_available(ZDNN_BATCHNORM),
"Expected is_operation_available() to return true.");
}
void test_nnpa_meanreduce2d() {
TEST_ASSERT_MESSAGE(true == is_operation_available(ZDNN_MEANREDUCE2D),
"Expected is_operation_available() to return true.");
}
void test_nnpa_avgpool2d() {
TEST_ASSERT_MESSAGE(true == is_operation_available(ZDNN_AVGPOOL2D),
"Expected is_operation_available() to return true.");
}
void test_nnpa_maxpool2d() {
TEST_ASSERT_MESSAGE(true == is_operation_available(ZDNN_MAXPOOL2D),
"Expected is_operation_available() to return true.");
}
void test_nnpa_sub() {
TEST_ASSERT_MESSAGE(true == is_operation_available(ZDNN_SUB),
"Expected is_operation_available() to return true.");
}
void test_nnpa_mul() {
TEST_ASSERT_MESSAGE(true == is_operation_available(ZDNN_MUL),
"Expected is_operation_available() to return true.");
}
void test_nnpa_gru() {
TEST_ASSERT_MESSAGE(true == is_operation_available(ZDNN_GRU),
"Expected is_operation_available() to return true.");
}
void test_nnpa_gelu() {
bool expected_status = !isTelumI();
bool status = is_operation_available(ZDNN_GELU);
TEST_ASSERT_MESSAGE_FORMATTED(
status == expected_status,
"is_operation_available() status is %s but expects %s",
status ? "true" : "false", expected_status ? "true" : "false");
}
void test_nnpa_relu() {
TEST_ASSERT_MESSAGE(true == is_operation_available(ZDNN_RELU),
"Expected is_operation_available() to return true.");
}
void test_nnpa_sqrt() {
bool expected_status = !isTelumI();
bool status = is_operation_available(ZDNN_SQRT);
TEST_ASSERT_MESSAGE_FORMATTED(
status == expected_status,
"is_operation_available() status is %s but expects %s",
status ? "true" : "false", expected_status ? "true" : "false");
}
void test_nnpa_invsqrt() {
bool expected_status = !isTelumI();
bool status = is_operation_available(ZDNN_INVSQRT);
TEST_ASSERT_MESSAGE_FORMATTED(
status == expected_status,
"is_operation_available() status is %s but expects %s",
status ? "true" : "false", expected_status ? "true" : "false");
}
void test_nnpa_norm() {
bool expected_status = !isTelumI();
bool status = is_operation_available(ZDNN_NORM);
TEST_ASSERT_MESSAGE_FORMATTED(
status == expected_status,
"is_operation_available() status is %s but expects %s",
status ? "true" : "false", expected_status ? "true" : "false");
}
void test_nnpa_moments() {
bool expected_status = !isTelumI();
bool status = is_operation_available(ZDNN_MOMENTS);
TEST_ASSERT_MESSAGE_FORMATTED(
status == expected_status,
"is_operation_available() status is %s but expects %s",
status ? "true" : "false", expected_status ? "true" : "false");
}
void test_nnpa_layernorm() {
bool expected_status = !isTelumI();
bool status = is_operation_available(ZDNN_LAYERNORM);
TEST_ASSERT_MESSAGE_FORMATTED(
status == expected_status,
"is_operation_available() status is %s but expects %s",
status ? "true" : "false", expected_status ? "true" : "false");
}
void test_nnpa_reduce() {
bool expected_status = !isTelumI();
bool status = is_operation_available(ZDNN_REDUCE);
TEST_ASSERT_MESSAGE_FORMATTED(
status == expected_status,
"is_operation_available() status is %s but expects %s",
status ? "true" : "false", expected_status ? "true" : "false");
}
void test_nnpa_conv2d() {
bool expected_status = !isTelumI();
bool status = is_operation_available(ZDNN_CONV2D);
TEST_ASSERT_MESSAGE_FORMATTED(
status == expected_status,
"is_operation_available() status is %s but expects %s",
status ? "true" : "false", expected_status ? "true" : "false");
}
void test_nnpa_softmax() {
TEST_ASSERT_MESSAGE(true == is_operation_available(ZDNN_SOFTMAX),
"Expected is_operation_available() to return true.");
}
void test_nnpa_softmax_mask() {
bool expected_status = !isTelumI();
bool status = is_operation_available(ZDNN_SOFTMAX_MASK);
TEST_ASSERT_MESSAGE_FORMATTED(
status == expected_status,
"is_operation_available() status is %s but expects %s",
status ? "true" : "false", expected_status ? "true" : "false");
}
void test_nnpa_matmul_op() {
TEST_ASSERT_MESSAGE(true == is_operation_available(ZDNN_MATMUL_OP),
"Expected is_operation_available() to return true.");
}
void test_nnpa_lstm() {
TEST_ASSERT_MESSAGE(true == is_operation_available(ZDNN_LSTM),
"Expected is_operation_available() to return true.");
}
void test_nnpa_leaky_relu() {
bool expected_status = !isTelumI();
bool status = is_operation_available(ZDNN_LEAKY_RELU);
TEST_ASSERT_MESSAGE_FORMATTED(
status == expected_status,
"is_operation_available() status is %s but expects %s",
status ? "true" : "false", expected_status ? "true" : "false");
}
void test_transform_with_saturation() {
bool expected_status = !isTelumI();
bool status = is_operation_available(ZDNN_TRANSFORM_ZTENSOR_WITH_SATURATION);
TEST_ASSERT_MESSAGE_FORMATTED(
status == expected_status,
"is_operation_available() status is %s but expects %s",
status ? "true" : "false", expected_status ? "true" : "false");
}
void test_transform_quant_ztensor() {
bool expected_status = !isTelumI();
bool status = is_operation_available(ZDNN_TRANSFORM_QUANTIZED_ZTENSOR);
TEST_ASSERT_MESSAGE_FORMATTED(
status == expected_status,
"is_operation_available() status is %s but expects %s",
status ? "true" : "false", expected_status ? "true" : "false");
}
// void test_matmul_bcast_op() {
// bool expected_status = !isTelumI();
// bool status = is_operation_available(ZDNN_MATMUL_BCAST_OP);
// TEST_ASSERT_MESSAGE_FORMATTED(
// status == expected_status,
// "is_operation_available() status is %s but expects %s",
// status ? "true" : "false", expected_status ? "true" : "false");
// }
// void test_matmul_transpose_op() {
// bool expected_status = !isTelumI();
// bool status = is_operation_available(ZDNN_MATMUL_TRANSPOSE_OP);
// TEST_ASSERT_MESSAGE_FORMATTED(
// status == expected_status,
// "is_operation_available() status is %s but expects %s",
// status ? "true" : "false", expected_status ? "true" : "false");
// }
// void test_quant_matmul_op() {
// bool expected_status = !isTelumI();
// bool status = is_operation_available(ZDNN_QUANTIZED_MATMUL_OP);
// TEST_ASSERT_MESSAGE_FORMATTED(
// status == expected_status,
// "is_operation_available() status is %s but expects %s",
// status ? "true" : "false", expected_status ? "true" : "false");
// }
// void test_quant_matmul_pre_computed_op() {
// bool expected_status = !isTelumI();
// bool status =
// is_operation_available(ZDNN_QUANTIZED_MATMUL_PRE_COMPUTED_OP);
// TEST_ASSERT_MESSAGE_FORMATTED(
// status == expected_status,
// "is_operation_available() status is %s but expects %s",
// status ? "true" : "false", expected_status ? "true" : "false");
// }
int main() {
UNITY_BEGIN();
RUN_TEST(test_nnpa_add);
RUN_TEST(test_nnpa_div);
RUN_TEST(test_nnpa_min);
RUN_TEST(test_nnpa_max);
RUN_TEST(test_nnpa_log);
RUN_TEST(test_nnpa_sig);
RUN_TEST(test_nnpa_exp);
RUN_TEST(test_nnpa_tahn);
RUN_TEST(test_nnpa_batchnorm);
RUN_TEST(test_nnpa_avgpool2d);
RUN_TEST(test_nnpa_meanreduce2d);
RUN_TEST(test_nnpa_maxpool2d);
RUN_TEST(test_nnpa_moments);
RUN_TEST(test_nnpa_layernorm);
RUN_TEST(test_nnpa_reduce);
RUN_TEST(test_nnpa_sub);
RUN_TEST(test_nnpa_mul);
RUN_TEST(test_nnpa_gru);
RUN_TEST(test_nnpa_gelu);
RUN_TEST(test_nnpa_relu);
RUN_TEST(test_nnpa_sqrt);
RUN_TEST(test_nnpa_invsqrt);
RUN_TEST(test_nnpa_norm);
RUN_TEST(test_nnpa_batchnorm);
RUN_TEST(test_nnpa_avgpool2d);
RUN_TEST(test_nnpa_meanreduce2d);
RUN_TEST(test_nnpa_maxpool2d);
RUN_TEST(test_nnpa_moments);
RUN_TEST(test_nnpa_layernorm);
RUN_TEST(test_nnpa_reduce);
RUN_TEST(test_nnpa_conv2d);
RUN_TEST(test_nnpa_softmax);
RUN_TEST(test_nnpa_softmax_mask);
RUN_TEST(test_nnpa_matmul_op);
RUN_TEST(test_nnpa_lstm);
RUN_TEST(test_nnpa_leaky_relu);
RUN_TEST(test_transform_with_saturation);
RUN_TEST(test_transform_quant_ztensor);
return UNITY_END();
}
zDNN-1.1.2/tests/testDriver_convert.c 0000664 0000000 0000000 00000034730 15000221702 0017521 0 ustar 00root root 0000000 0000000 // SPDX-License-Identifier: Apache-2.0
/*
* Copyright IBM Corp. 2021, 2024
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
/*
* This test driver tests the data type conversion code upon which
* the Stickify/Unstickify paths are dependent for conversion AND
* proper value placement.
*
* Each test creates a set of random float values (FP32, FP16 or BFLOAT)
* and calls a common routine to build its own version of the converted
* values, invoke the library's convert_data_format, then compare the two
* areas for expected values and placement. It then does the opposite:
* invokes teh library's convert_data_format to convert back to the
* original format, and compares the input area to the converted/unconverted
* area for proper placement.
*
* Note that the 'no stride' Stickify/unstickify processing will handle sets of
*values numbering larger than 64, so values up to 64 are tested here.
*
* Also note that the stride versions will likely have different validation
* because it *doesn't* have the aforementioned '64 entry' limitation.
*/
#include "convert.h"
#include "testsupport.h"
#include
#include
#include
#include
#include
/*
* tests:
* test FP32->DLFloat, using 1,4,7,8,9,15,63,64 (no stride)
* test FP16->DLFloat, using 1,7,8,9,63,64 (no stride)
* test BFLOAT->DLFloat, using 1,7,8,9,63,64 (no stride)
*
* test DLFloat->FP16, using 1,7,8,9,63,64 (no stride)
* test DLFloat->FP32, using 1,4,7,8,9,15,63,64 (no stride)
* test DLFloat->BFloat, using 1,7,8,9,63,64 (no stride)
*/
/* define some packed structs for holding data */
// midfloat_str used by FP16 testing, easily grabs
// middle two bytes of a FP32 and treats it as a
// 2 byte float.
typedef
#ifdef __MVS__
_Packed
#endif
struct midfloat_str {
uint8_t filler1;
float_bit16 shortfloat;
uint8_t filler2;
}
#ifndef __MVS__
__attribute__((packed))
#endif
midfloat_str;
// skip performing saturation for all convert tests using this function
void (*saturate_func)(const vec_float32 *, const vec_float32 *, vec_float32 *,
vec_float32 *) = &skip_saturate_fp32_to_dlf16;
// expected_data_str structure with a union to
// allow us to convert individual values then compare as
// one data area
typedef
#ifdef __MVS__
_Packed
#endif
struct expected_data_str {
union maps {
float_bit16 shortfloat[64];
float expfloat[64];
// cppcheck-suppress unusedStructMember
char exp_data_reserved[1024];
} maps;
}
#ifndef __MVS__
__attribute__((packed))
#endif
expected_data_str;
/*
vec_char8 selection_vector = {0, 1, 4, 5, 8, 9, 12, 13,
16, 17, 20, 21, 24, 25, 28, 29};*/
/* convert_and_compare
Accepts an array of up to 64 values, converts the values to DL16
itself, calls convert_data_format to do its thing,
and compares the two areas. Then converts back and compares that
to the original. Return values are multiplied by constants to separate
different types of errors. */
int convert_and_compare(zdnn_data_types in_type, int numvalues,
void *fixeddata) {
// Define areas for stickify conversion to return results
char converted_DLF_data[1024];
char converted_orig_data[1024];
memset((void *)(converted_DLF_data), 0, 1024);
memset((void *)(converted_orig_data), 0, 1024);
// Define an expected data area for comparing our version of converted
// values (and placement) to The Library's.
expected_data_str expected_DLF_data;
memset((void *)(&expected_DLF_data), 0, 1024);
// Define a lossy data area for comparing the original data (with expected
// precision loss) and The Library's converted-back-to-original data.
expected_data_str expected_orig_data;
memset((void *)(&expected_orig_data), 0, 1024);
float_bit16 *fixed_float_bit16 = (float_bit16 *)fixeddata;
float *fixedfloat = (float *)fixeddata;
// Build the "expected" areas that we will compare to conversion results
for (int i = 0; i < numvalues; i = i + 1) {
if (in_type == FP32) {
expected_DLF_data.maps.shortfloat[i] =
cnvt_1_fp32_to_dlf16(fixedfloat[i]); /* Convert a value, store in
expected dlfloat entry */
LOG_DEBUG("++ c_1_fp32_to_dlf for expected DLF %d of %d", i, numvalues);
LOG_DEBUG("First : %x, Second: %x", fixedfloat[i],
expected_DLF_data.maps.shortfloat[i]);
expected_orig_data.maps.expfloat[i] = cnvt_1_dlf16_to_fp32(
expected_DLF_data.maps.shortfloat[i]); /* Convert a value back to
original format, store in
expected original format
entry */
LOG_DEBUG("++ c_1_dlf16_to_FP32 for expected Orig %d of %d", i,
numvalues);
LOG_DEBUG("First : %x, Second: %x", fixedfloat[i],
expected_orig_data.maps.shortfloat[i]);
}
if (in_type == FP16) {
expected_DLF_data.maps.shortfloat[i] =
cnvt_1_fp16_to_dlf16(fixed_float_bit16[i]); /* Convert a value, store
in expected dlfloat entry */
expected_orig_data.maps.shortfloat[i] = cnvt_1_dlf16_to_fp16(
expected_DLF_data.maps.shortfloat[i]); /* Convert a value back to
original format, store in
expected original format
entry */
}
if (in_type == BFLOAT) {
expected_DLF_data.maps.shortfloat[i] =
cnvt_1_bfloat_to_dlf16(fixed_float_bit16[i]); /* Convert a value,
store in expected dlfloat entry */
expected_orig_data.maps.shortfloat[i] =
cnvt_1_dlf16_to_bfloat(expected_DLF_data.maps.shortfloat[i]); /*
Convert a value back to original
format, store in expected
original format entry */
}
}
// call convert_data to convert/stickify the original data
LOG_DEBUG("Calling convert_data_format", NO_ARG);
int converted_cnt =
convert_data_format(fixeddata, in_type, converted_DLF_data,
ZDNN_DLFLOAT16, numvalues, saturate_func);
if (converted_cnt != numvalues) {
LOG_DEBUG("convert_data (to DLF) did not return proper result (%d != %d)",
converted_cnt, numvalues);
TEST_FAIL_MESSAGE("convert_data (to DLF) count did not match actual");
}
// compare expected to convert_data_format output
LOG_DEBUG("comparing expected to convert_data output", NO_ARG);
LOG_DEBUG("expected data - first word / last word %d / %d",
*(int *)((char *)&expected_DLF_data),
*(int *)((char *)&expected_DLF_data) +
(numvalues * get_data_type_size(ZDNN_DLFLOAT16)) - 4);
LOG_DEBUG("expected data address %" PRIXPTR "",
(uint64_t)((char *)&expected_DLF_data));
LOG_DEBUG("converted data - first word / last word %d / %d",
*(int *)((char *)converted_DLF_data),
*(int *)((char *)converted_DLF_data) +
(numvalues * get_data_type_size(ZDNN_DLFLOAT16)) - 4);
LOG_DEBUG("converted data address %" PRIXPTR "",
(uint64_t)((char *)converted_DLF_data));
TEST_ASSERT_MESSAGE(sizeof(expected_DLF_data) == sizeof(converted_DLF_data),
"expected data sizes different (test u/t error)");
int compare_data_size = sizeof(expected_DLF_data);
/* validate converted area has something in it */
char zeroes[256];
memset(zeroes, 0, sizeof(zeroes));
TEST_ASSERT_MESSAGE(
memcmp(converted_DLF_data, zeroes, (numvalues * sizeof(short))) != 0,
"converted-to-dlf area left as zeros");
/* Compare expected DLFLOAT to converted DLFLOAT, and validate */
int memcmp_rc =
memcmp(&expected_DLF_data, converted_DLF_data, (size_t)compare_data_size);
if (memcmp_rc != 0) {
BEGIN_BLOCK_IF_LOGLEVEL_DEBUG {
printf("memcmp (post convert to DLF) did not return proper result (%d)",
memcmp_rc);
printf("expected DLFloat data\n");
print_hex((numvalues * sizeof(float)), &expected_DLF_data);
printf("Converted DLFloat data\n");
print_hex((numvalues * sizeof(float)), converted_DLF_data);
}
TEST_FAIL_MESSAGE(
"memcmp (post convert to DLF, no stride) did not match expected");
}
// call convert_data in stride to convert/stickify the original data
LOG_DEBUG("call convert_data_in_stride", NO_ARG);
converted_cnt = convert_data_format_in_stride(
fixeddata, in_type, converted_DLF_data, ZDNN_DLFLOAT16, numvalues, 1);
if (converted_cnt != numvalues) {
LOG_DEBUG("Converted (in_stride) count doesn't match actual, %d / %d",
converted_cnt, numvalues);
TEST_FAIL_MESSAGE(
"Convert_data (to DLF) in stride did not return proper result");
}
/* Compare expected DLFLOAT to converted DLFLOAT, and validate */
memcmp_rc =
memcmp(&expected_DLF_data, converted_DLF_data, (size_t)compare_data_size);
if (memcmp_rc != 0) {
BEGIN_BLOCK_IF_LOGLEVEL_DEBUG {
printf("Expected data doesn't match converted, %d", memcmp_rc);
printf("expected DLFloat data\n");
print_hex((numvalues * sizeof(float)), &expected_DLF_data);
printf("Converted DLFloat data\n");
print_hex((numvalues * sizeof(float)), converted_DLF_data);
}
TEST_FAIL_MESSAGE("Converted DLF data (instride) did not match");
}
// Now convert back the other way, and compare to original
LOG_DEBUG(
"comparing data converted back to Orig format by convert_data output",
NO_ARG);
int orig_data_size = numvalues * get_data_type_size(in_type);
LOG_DEBUG("call convert_data", NO_ARG);
int converted_cnt2 = convert_data_format(converted_DLF_data, ZDNN_DLFLOAT16,
converted_orig_data, in_type,
numvalues, saturate_func);
if (converted_cnt2 != numvalues) {
LOG_DEBUG("converted count (to_orig) did not match actual (%d != %d)",
converted_cnt2, numvalues);
TEST_FAIL_MESSAGE(
"convert_data (to orig, no stride) count did not match actual");
}
TEST_ASSERT_MESSAGE(
memcmp(converted_orig_data, zeroes, (numvalues * sizeof(short))) != 0,
"converted-to-original area left as zeros");
int memcmp_rc2 =
memcmp(&expected_orig_data, converted_orig_data, (size_t)orig_data_size);
if (memcmp_rc2 != 0) {
BEGIN_BLOCK_IF_LOGLEVEL_DEBUG {
printf("memcmp (after convert back to original) did not return "
"proper result (%d)",
memcmp_rc2);
printf("expected orig vs converted orig data\n");
print_hex(orig_data_size, &expected_orig_data);
print_hex(orig_data_size, converted_orig_data);
}
TEST_FAIL_MESSAGE("convert_data (back to orig) did not match initial");
}
return ZDNN_STATUS_OK;
}
// generate a float value between SMALLEST_RANDOM_FP to max
#define GEN_RAND_FLOAT(x, max) \
while ((x) < SMALLEST_RANDOM_FP) { \
(x) = (float)rand() / (float)(RAND_MAX / max); \
}
/*********************/
/* FP32 to DLF tests */
/*********************/
void test_FP32_DLF(int count) {
float fixeddata[128] = {0};
// Build a tensor data area of req'd type with random data
for (int i = 0; i < count; i++) {
GEN_RAND_FLOAT(fixeddata[i], 3);
}
int test_result = convert_and_compare(FP32, count, fixeddata);
TEST_ASSERT_MESSAGE(0 == test_result,
"Converted and expected areas did not match");
}
void test_FP32_DLF_1() { test_FP32_DLF(1); }
void test_FP32_DLF_4() { test_FP32_DLF(4); }
void test_FP32_DLF_7() { test_FP32_DLF(7); }
void test_FP32_DLF_8() { test_FP32_DLF(8); }
void test_FP32_DLF_9() { test_FP32_DLF(9); }
void test_FP32_DLF_15() { test_FP32_DLF(15); }
void test_FP32_DLF_63() { test_FP32_DLF(63); }
void test_FP32_DLF_64() { test_FP32_DLF(64); }
void test_16_DLF(zdnn_data_types type, int count) {
float_bit16 fixeddata[4096] = {0};
// Build a tensor data area of req'd type with random data
for (int i = 0; i < count; i++) {
float temp_float = 0;
GEN_RAND_FLOAT(temp_float, 3);
if (type == FP16) {
fixeddata[i] = cnvt_1_fp32_to_fp16(temp_float);
} else if (type == BFLOAT) {
fixeddata[i] = cnvt_1_fp32_to_bfloat(temp_float);
}
}
int test_result = convert_and_compare(type, count, fixeddata);
TEST_ASSERT_MESSAGE(0 == test_result,
"Converted and expected areas did not match");
}
void test_FP16_DLF_1() { test_16_DLF(FP16, 1); }
void test_FP16_DLF_7() { test_16_DLF(FP16, 7); }
void test_FP16_DLF_8() { test_16_DLF(FP16, 8); }
void test_FP16_DLF_9() { test_16_DLF(FP16, 9); }
void test_FP16_DLF_63() { test_16_DLF(FP16, 63); }
void test_FP16_DLF_64() { test_16_DLF(FP16, 64); }
void test_BFLOAT_DLF_1() { test_16_DLF(BFLOAT, 1); }
void test_BFLOAT_DLF_7() { test_16_DLF(BFLOAT, 7); }
void test_BFLOAT_DLF_8() { test_16_DLF(BFLOAT, 8); }
void test_BFLOAT_DLF_9() { test_16_DLF(BFLOAT, 9); }
void test_BFLOAT_DLF_63() { test_16_DLF(BFLOAT, 63); }
void test_BFLOAT_DLF_64() { test_16_DLF(BFLOAT, 64); }
void setUp(void) { VERIFY_HW_ENV; }
void tearDown(void) {}
int main() {
UNITY_BEGIN();
srand(time(0)); /* set up to get random values */
RUN_TEST(test_FP32_DLF_1);
RUN_TEST(test_FP32_DLF_4);
RUN_TEST(test_FP32_DLF_7);
RUN_TEST(test_FP32_DLF_8);
RUN_TEST(test_FP32_DLF_9);
RUN_TEST(test_FP32_DLF_15);
RUN_TEST(test_FP32_DLF_63);
RUN_TEST(test_FP32_DLF_64);
RUN_TEST(test_BFLOAT_DLF_1);
RUN_TEST(test_BFLOAT_DLF_7);
RUN_TEST(test_BFLOAT_DLF_8);
RUN_TEST(test_BFLOAT_DLF_9);
RUN_TEST(test_BFLOAT_DLF_63);
RUN_TEST(test_BFLOAT_DLF_64);
RUN_TEST(test_FP16_DLF_1);
RUN_TEST(test_FP16_DLF_7);
RUN_TEST(test_FP16_DLF_8);
RUN_TEST(test_FP16_DLF_9);
RUN_TEST(test_FP16_DLF_63);
RUN_TEST(test_FP16_DLF_64);
return UNITY_END();
}
zDNN-1.1.2/tests/testDriver_get.c 0000664 0000000 0000000 00000011027 15000221702 0016612 0 ustar 00root root 0000000 0000000 // SPDX-License-Identifier: Apache-2.0
/*
* Copyright IBM Corp. 2021, 2024
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#include
#include
#include
#include
#include "testsupport.h"
void setUp(void) {}
void tearDown(void) {}
//=================================================================================================
// tests for zdnn_get_max_limit and zdnn_get_min_limit
void test_max_limit_dlf16_fp32() {
zdnn_status expected_status = ZDNN_OK;
float expected_value = DLF16_MAX_AS_FP32;
float my_data = 0;
zdnn_status return_status =
zdnn_get_max_limit(ZDNN_DLFLOAT16, FP32, &my_data);
TEST_ASSERT_EQUAL(expected_status, return_status);
TEST_ASSERT_EQUAL_FLOAT(expected_value, my_data);
}
void test_max_limit_dlf16_fp16() {
zdnn_status expected_status = ZDNN_OK;
uint16_t expected_value = FP16_MAX;
uint16_t my_data = 0;
zdnn_status return_status =
zdnn_get_max_limit(ZDNN_DLFLOAT16, FP16, &my_data);
TEST_ASSERT_EQUAL(expected_status, return_status);
TEST_ASSERT_EQUAL_UINT16(expected_value, my_data);
}
void test_max_limit_dlf16_bfloat() {
zdnn_status expected_status = ZDNN_OK;
uint16_t expected_value = DLF16_MAX_AS_BFLOAT;
uint16_t my_data = 0;
zdnn_status return_status =
zdnn_get_max_limit(ZDNN_DLFLOAT16, BFLOAT, &my_data);
TEST_ASSERT_EQUAL(expected_status, return_status);
TEST_ASSERT_EQUAL_UINT16(expected_value, my_data);
}
void test_min_limit_int8_fp32() {
zdnn_status expected_status = ZDNN_OK;
float expected_value = INT8_MIN_AS_FP32;
float my_data = 0;
zdnn_status return_status =
zdnn_get_min_limit(ZDNN_BINARY_INT8, FP32, &my_data);
TEST_ASSERT_EQUAL(expected_status, return_status);
TEST_ASSERT_EQUAL_FLOAT(expected_value, my_data);
}
void test_min_limit_int8_fp16() {
zdnn_status expected_status = ZDNN_OK;
uint16_t expected_value = INT8_MIN_AS_FP16;
uint16_t my_data = 0;
zdnn_status return_status =
zdnn_get_min_limit(ZDNN_BINARY_INT8, FP16, &my_data);
TEST_ASSERT_EQUAL(expected_status, return_status);
TEST_ASSERT_EQUAL_UINT16(expected_value, my_data);
}
void test_min_limit_int8_bfloat() {
zdnn_status expected_status = ZDNN_OK;
uint16_t expected_value = INT8_MIN_AS_BFLOAT;
uint16_t my_data = 0;
zdnn_status return_status =
zdnn_get_min_limit(ZDNN_BINARY_INT8, BFLOAT, &my_data);
TEST_ASSERT_EQUAL(expected_status, return_status);
TEST_ASSERT_EQUAL_UINT16(expected_value, my_data);
}
void test_min_limit_int8_int8() {
zdnn_status expected_status = ZDNN_OK;
int8_t expected_value = INT8_MIN;
int8_t my_data = 0;
zdnn_status return_status =
zdnn_get_min_limit(ZDNN_BINARY_INT8, INT8, &my_data);
TEST_ASSERT_EQUAL(expected_status, return_status);
TEST_ASSERT_EQUAL_INT8(expected_value, my_data);
}
void test_min_limit_int32_int32() {
zdnn_status expected_status = ZDNN_OK;
int32_t expected_value = INT32_MIN;
int32_t my_data = 0;
zdnn_status return_status =
zdnn_get_min_limit(ZDNN_BINARY_INT32, INT32, &my_data);
TEST_ASSERT_EQUAL(expected_status, return_status);
TEST_ASSERT_EQUAL_INT32(expected_value, my_data);
}
void test_invalid_limit_int32_int8() {
zdnn_status expected_status = ZDNN_INVALID_TYPE;
int32_t my_data = 0;
zdnn_status return_status =
zdnn_get_min_limit(ZDNN_BINARY_INT32, INT8, &my_data);
TEST_ASSERT_EQUAL(expected_status, return_status);
}
void test_invalid_transformed_type() {
zdnn_status expected_status = ZDNN_INVALID_TYPE;
float my_data = 0;
zdnn_status return_status = zdnn_get_max_limit(999, FP32, &my_data);
TEST_ASSERT_EQUAL(expected_status, return_status);
}
int main(void) {
UNITY_BEGIN();
RUN_TEST(test_max_limit_dlf16_fp32);
RUN_TEST(test_max_limit_dlf16_fp16);
RUN_TEST(test_max_limit_dlf16_bfloat);
RUN_TEST(test_min_limit_int8_fp32);
RUN_TEST(test_min_limit_int8_fp16);
RUN_TEST(test_min_limit_int8_bfloat);
RUN_TEST(test_min_limit_int8_int8);
RUN_TEST(test_min_limit_int32_int32);
RUN_TEST(test_invalid_limit_int32_int8);
RUN_TEST(test_invalid_transformed_type);
return UNITY_END();
} zDNN-1.1.2/tests/testDriver_getoffset.c 0000664 0000000 0000000 00000021246 15000221702 0020025 0 ustar 00root root 0000000 0000000 // SPDX-License-Identifier: Apache-2.0
/*
* Copyright IBM Corp. 2021, 2024
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#include
#include
#include
#include
#include "testsupport.h"
void setUp(void) {}
void tearDown(void) {}
//=================================================================================================
// tests for get_stick_offset
void test_offset(uint32_t dim4, uint32_t dim3, uint32_t dim2, uint32_t dim1,
zdnn_data_layouts layout) {
zdnn_status status;
zdnn_tensor_desc pre_tfrmd_desc, tfrmd_desc;
zdnn_ztensor ztensor;
zdnn_init_pre_transformed_desc(layout, test_datatype, &pre_tfrmd_desc, dim4,
dim3, dim2, dim1);
status = zdnn_generate_transformed_desc(&pre_tfrmd_desc, &tfrmd_desc);
TEST_ASSERT_MESSAGE_FORMATTED(
status == ZDNN_OK, "zdnn_generate_transformed_desc() returned %d \"%s\"",
status, zdnn_get_status_message(status));
zdnn_init_ztensor(&pre_tfrmd_desc, &tfrmd_desc, &ztensor);
size_t *correct_offset = alloc_offsets(&ztensor);
uint64_t *offsets_calculated =
malloc(sizeof(uint64_t) * get_num_elements(&ztensor, ELEMENTS_PRE));
uint64_t c = 0;
for (uint32_t e4x = 0; e4x < pre_tfrmd_desc.dim4; e4x++) {
for (uint32_t e3x = 0; e3x < pre_tfrmd_desc.dim3; e3x++) {
for (uint32_t e2x = 0; e2x < pre_tfrmd_desc.dim2; e2x++) {
for (uint32_t e1x = 0; e1x < pre_tfrmd_desc.dim1; e1x++) {
offsets_calculated[c] =
get_stick_offset(e4x, e3x, e2x, e1x, &pre_tfrmd_desc);
TEST_ASSERT_MESSAGE_FORMATTED(
offsets_calculated[c] == correct_offset[c],
"element (%d, %d, %d, %d) has wrong offset of %" PRIu64
", (expects %" PRIu64 ")",
e4x, e3x, e2x, e1x, offsets_calculated[c], correct_offset[c]);
c++;
}
}
}
}
free(offsets_calculated);
free(correct_offset);
}
// offsets for a 1,4,4,1,NHWC
void test_nhwc_1x4x4x1() { test_offset(1, 4, 4, 1, ZDNN_NHWC); }
void test_nhwc_1x2x2x4() { test_offset(1, 2, 2, 4, ZDNN_NHWC); }
// offsets for 1,32,32,3,NHWC
void test_nhwc_1x32x32x3() { test_offset(1, 32, 32, 3, ZDNN_NHWC); }
void test_nhwc_1x4x33x64() { test_offset(1, 4, 33, 64, ZDNN_NHWC); }
void test_nhwc_1x4x32x65() { test_offset(1, 4, 32, 65, ZDNN_NHWC); }
void test_nhwc_1x4x33x65() { test_offset(1, 4, 33, 65, ZDNN_NHWC); }
void test_nhwc_1x2x3x4() { test_offset(1, 2, 3, 4, ZDNN_NHWC); }
void test_nhwc_1x1x31x64() { test_offset(1, 1, 31, 64, ZDNN_NHWC); }
void test_nhwc_1x1x32x64() { test_offset(1, 1, 32, 64, ZDNN_NHWC); }
void test_nhwc_1x1x33x64() { test_offset(1, 1, 33, 64, ZDNN_NHWC); }
void test_nhwc_1x1x32x63() { test_offset(1, 1, 32, 63, ZDNN_NHWC); }
void test_nhwc_1x1x32x65() { test_offset(1, 1, 32, 65, ZDNN_NHWC); }
void test_nhwc_1x1x4x127() { test_offset(1, 1, 4, 127, ZDNN_NHWC); }
void test_nhwc_1x1x4x128() { test_offset(1, 1, 4, 128, ZDNN_NHWC); }
void test_nhwc_1x1x4x129() { test_offset(1, 1, 4, 129, ZDNN_NHWC); }
void test_nhwc_1x1x63x4() { test_offset(1, 1, 63, 4, ZDNN_NHWC); }
void test_nhwc_1x1x64x4() { test_offset(1, 1, 64, 4, ZDNN_NHWC); }
void test_nhwc_1x1x65x4() { test_offset(1, 1, 65, 4, ZDNN_NHWC); }
void test_nhwc_2x3x33x129() { test_offset(2, 3, 33, 129, ZDNN_NHWC); }
void test_nchw_1x1x4x4() { test_offset(1, 1, 4, 4, ZDNN_NCHW); }
void test_nchw_1x4x2x3() { test_offset(1, 4, 2, 3, ZDNN_NCHW); }
void test_nchw_1x3x32x32() { test_offset(1, 3, 32, 32, ZDNN_NCHW); }
void test_nchw_2x129x3x33() { test_offset(2, 129, 3, 33, ZDNN_NCHW); }
void test_nchw_1x64x1x31() { test_offset(1, 64, 1, 31, ZDNN_NCHW); }
void test_nchw_1x64x1x32() { test_offset(1, 64, 1, 32, ZDNN_NCHW); }
void test_nchw_1x64x1x33() { test_offset(1, 64, 1, 33, ZDNN_NCHW); }
void test_nchw_1x63x1x32() { test_offset(1, 63, 1, 32, ZDNN_NCHW); }
void test_nchw_1x65x1x32() { test_offset(1, 65, 1, 32, ZDNN_NCHW); }
void test_nchw_1x127x1x4() { test_offset(1, 127, 1, 4, ZDNN_NCHW); }
void test_nchw_1x128x1x4() { test_offset(1, 128, 1, 4, ZDNN_NCHW); }
void test_nchw_1x129x1x4() { test_offset(1, 129, 1, 4, ZDNN_NCHW); }
void test_nchw_1x4x1x63() { test_offset(1, 4, 1, 63, ZDNN_NCHW); }
void test_nchw_1x4x1x64() { test_offset(1, 4, 1, 64, ZDNN_NCHW); }
void test_nchw_1x4x1x65() { test_offset(1, 4, 1, 65, ZDNN_NCHW); }
void test_hwck_1x4x4x1() { test_offset(1, 4, 4, 1, ZDNN_HWCK); }
void test_hwck_1x2x3x4() { test_offset(1, 2, 3, 4, ZDNN_HWCK); }
void test_hwck_2x3x33x129() { test_offset(2, 3, 33, 129, ZDNN_HWCK); }
void test_hwck_1x32x32x3() { test_offset(1, 32, 32, 3, ZDNN_HWCK); }
void test_hwck_1x1x32x63() { test_offset(1, 1, 32, 63, ZDNN_HWCK); }
void test_hwck_1x1x31x64() { test_offset(1, 1, 31, 64, ZDNN_HWCK); }
void test_hwck_1x1x32x64() { test_offset(1, 1, 32, 64, ZDNN_HWCK); }
void test_hwck_1x1x33x64() { test_offset(1, 1, 33, 64, ZDNN_HWCK); }
void test_hwck_1x1x32x65() { test_offset(1, 1, 32, 65, ZDNN_HWCK); }
void test_hwck_1x1x4x127() { test_offset(1, 1, 4, 127, ZDNN_HWCK); }
void test_hwck_1x1x4x128() { test_offset(1, 1, 4, 128, ZDNN_HWCK); }
void test_hwck_1x1x4x129() { test_offset(1, 1, 4, 129, ZDNN_HWCK); }
void test_hwck_1x1x63x4() { test_offset(1, 1, 63, 4, ZDNN_HWCK); }
void test_hwck_1x1x64x4() { test_offset(1, 1, 64, 4, ZDNN_HWCK); }
void test_hwck_1x1x65x4() { test_offset(1, 1, 65, 4, ZDNN_HWCK); }
int main(void) {
UNITY_BEGIN();
RUN_TEST_ALL_DLFLOAT16_PRE_DATATYPES(test_nhwc_1x4x4x1);
RUN_TEST_ALL_DLFLOAT16_PRE_DATATYPES(test_nhwc_1x2x2x4);
RUN_TEST_ALL_DLFLOAT16_PRE_DATATYPES(test_nhwc_1x32x32x3);
RUN_TEST_ALL_DLFLOAT16_PRE_DATATYPES(test_nhwc_1x4x33x64);
RUN_TEST_ALL_DLFLOAT16_PRE_DATATYPES(test_nhwc_1x4x32x65);
RUN_TEST_ALL_DLFLOAT16_PRE_DATATYPES(test_nhwc_1x4x33x65);
RUN_TEST_ALL_DLFLOAT16_PRE_DATATYPES(test_nhwc_2x3x33x129);
RUN_TEST_ALL_DLFLOAT16_PRE_DATATYPES(test_nhwc_1x1x31x64);
RUN_TEST_ALL_DLFLOAT16_PRE_DATATYPES(test_nhwc_1x1x32x64);
RUN_TEST_ALL_DLFLOAT16_PRE_DATATYPES(test_nhwc_1x1x33x64);
RUN_TEST_ALL_DLFLOAT16_PRE_DATATYPES(test_nhwc_1x1x32x63);
RUN_TEST_ALL_DLFLOAT16_PRE_DATATYPES(test_nhwc_1x1x32x65);
RUN_TEST_ALL_DLFLOAT16_PRE_DATATYPES(test_nhwc_1x1x4x127);
RUN_TEST_ALL_DLFLOAT16_PRE_DATATYPES(test_nhwc_1x1x4x128);
RUN_TEST_ALL_DLFLOAT16_PRE_DATATYPES(test_nhwc_1x1x4x129);
RUN_TEST_ALL_DLFLOAT16_PRE_DATATYPES(test_nhwc_1x1x63x4);
RUN_TEST_ALL_DLFLOAT16_PRE_DATATYPES(test_nhwc_1x1x64x4);
RUN_TEST_ALL_DLFLOAT16_PRE_DATATYPES(test_nhwc_1x1x65x4);
RUN_TEST_ALL_DLFLOAT16_PRE_DATATYPES(test_nchw_1x1x4x4);
RUN_TEST_ALL_DLFLOAT16_PRE_DATATYPES(test_nchw_1x4x2x3);
RUN_TEST_ALL_DLFLOAT16_PRE_DATATYPES(test_nchw_1x3x32x32);
RUN_TEST_ALL_DLFLOAT16_PRE_DATATYPES(test_nchw_2x129x3x33);
RUN_TEST_ALL_DLFLOAT16_PRE_DATATYPES(test_nchw_1x63x1x32);
RUN_TEST_ALL_DLFLOAT16_PRE_DATATYPES(test_nchw_1x64x1x31);
RUN_TEST_ALL_DLFLOAT16_PRE_DATATYPES(test_nchw_1x64x1x32);
RUN_TEST_ALL_DLFLOAT16_PRE_DATATYPES(test_nchw_1x64x1x33);
RUN_TEST_ALL_DLFLOAT16_PRE_DATATYPES(test_nchw_1x65x1x32);
RUN_TEST_ALL_DLFLOAT16_PRE_DATATYPES(test_nchw_1x127x1x4);
RUN_TEST_ALL_DLFLOAT16_PRE_DATATYPES(test_nchw_1x128x1x4);
RUN_TEST_ALL_DLFLOAT16_PRE_DATATYPES(test_nchw_1x129x1x4);
RUN_TEST_ALL_DLFLOAT16_PRE_DATATYPES(test_nchw_1x4x1x63);
RUN_TEST_ALL_DLFLOAT16_PRE_DATATYPES(test_nchw_1x4x1x64);
RUN_TEST_ALL_DLFLOAT16_PRE_DATATYPES(test_nchw_1x4x1x65);
RUN_TEST_ALL_DLFLOAT16_PRE_DATATYPES(test_hwck_1x4x4x1);
RUN_TEST_ALL_DLFLOAT16_PRE_DATATYPES(test_hwck_1x2x3x4);
RUN_TEST_ALL_DLFLOAT16_PRE_DATATYPES(test_hwck_1x32x32x3);
RUN_TEST_ALL_DLFLOAT16_PRE_DATATYPES(test_hwck_2x3x33x129);
RUN_TEST_ALL_DLFLOAT16_PRE_DATATYPES(test_hwck_1x1x32x63);
RUN_TEST_ALL_DLFLOAT16_PRE_DATATYPES(test_hwck_1x1x31x64);
RUN_TEST_ALL_DLFLOAT16_PRE_DATATYPES(test_hwck_1x1x32x64);
RUN_TEST_ALL_DLFLOAT16_PRE_DATATYPES(test_hwck_1x1x33x64);
RUN_TEST_ALL_DLFLOAT16_PRE_DATATYPES(test_hwck_1x1x32x65);
RUN_TEST_ALL_DLFLOAT16_PRE_DATATYPES(test_hwck_1x1x4x127);
RUN_TEST_ALL_DLFLOAT16_PRE_DATATYPES(test_hwck_1x1x4x128);
RUN_TEST_ALL_DLFLOAT16_PRE_DATATYPES(test_hwck_1x1x4x129);
RUN_TEST_ALL_DLFLOAT16_PRE_DATATYPES(test_hwck_1x1x63x4);
RUN_TEST_ALL_DLFLOAT16_PRE_DATATYPES(test_hwck_1x1x64x4);
RUN_TEST_ALL_DLFLOAT16_PRE_DATATYPES(test_hwck_1x1x65x4);
return UNITY_END();
}
zDNN-1.1.2/tests/testDriver_init_ztensor.c 0000664 0000000 0000000 00000125654 15000221702 0020576 0 ustar 00root root 0000000 0000000 // SPDX-License-Identifier: Apache-2.0
/*
* Copyright IBM Corp. 2021, 2024
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#include
#include
#include
#include
#include
#include
#include "testsupport.h"
void setUp(void) { VERIFY_HW_ENV; }
void tearDown(void) {}
// Helper method for tests that check the boundaries of the maximum dim1 index.
// Concatenated ztensors introduce padding that must be determined to test this.
// See zdnn_generate_transformed_desc_concatenated() to see padding equation.
uint32_t max_concat_dim1(uint32_t num_concats) {
uint32_t temp = zdnn_get_max_for_dim(1) / num_concats;
uint32_t max_concat_dim1 = temp - (temp % AIU_2BYTE_CELLS_PER_STICK);
LOG_TRACE("returning %d\n", max_concat_dim1);
return max_concat_dim1;
}
// test if we can zdnn_init_ztensor_with_malloc() correctly with the supplied
// pre-transformed and transformed descriptors
void test_main(zdnn_tensor_desc *pre_tfrmd_desc, zdnn_tensor_desc *tfrmd_desc,
zdnn_concat_info info, uint64_t exp_size,
zdnn_status exp_status_allochelper) {
zdnn_ztensor ztensor;
zdnn_status status;
status = zdnn_init_ztensor_with_malloc(pre_tfrmd_desc, tfrmd_desc, &ztensor);
TEST_ASSERT_MESSAGE_FORMATTED(
status == exp_status_allochelper,
"zdnn_init_ztensor_with_malloc() status is %08x (%s) "
"but expects %08x (%s) (concat info = %08x)",
status, zdnn_get_status_message(status), exp_status_allochelper,
zdnn_get_status_message(exp_status_allochelper), info);
// check and free buffer but only if expected
// zdnn_init_ztensor_with_malloc() to work
if (exp_status_allochelper == ZDNN_OK) {
TEST_ASSERT_MESSAGE_FORMATTED(
ztensor.buffer_size == exp_size,
"zdnn_init_ztensor_with_malloc() returns incorrect size: %" PRIu64
" (expects %" PRIu64 ") (concat info = %08x)",
ztensor.buffer_size, exp_size, info);
zdnn_free_ztensor_buffer(&ztensor);
}
}
void test_normal(zdnn_tensor_desc *pre_tfrmd_desc, uint64_t exp_size) {
zdnn_tensor_desc tfrmd_desc;
zdnn_status status;
status = zdnn_generate_transformed_desc(pre_tfrmd_desc, &tfrmd_desc);
TEST_ASSERT_MESSAGE_FORMATTED(
status == ZDNN_OK,
"zdnn_generate_transformed_desc() status is %08x (%s) "
"but expects %08x (%s))",
status, zdnn_get_status_message(status), ZDNN_OK,
zdnn_get_status_message(ZDNN_OK));
test_main(pre_tfrmd_desc, &tfrmd_desc, NO_CONCAT, exp_size, ZDNN_OK);
}
// test if we can zdnn_init_quantized_ztensor_with_malloc() correctly with the
// supplied pre-transformed and quantized transformed descriptors
void test_quantized_main(zdnn_tensor_desc *pre_tfrmd_desc,
zdnn_tensor_desc *tfrmd_desc, float scale,
float offset, uint64_t exp_size,
zdnn_status exp_status_allochelper) {
zdnn_ztensor ztensor;
zdnn_status status;
status = zdnn_init_quantized_ztensor_with_malloc(pre_tfrmd_desc, tfrmd_desc,
scale, offset, &ztensor);
TEST_ASSERT_MESSAGE_FORMATTED(
status == exp_status_allochelper,
"zdnn_init_quantized_ztensor_with_malloc() status is %08x (%s) "
"but expects %08x (%s)",
status, zdnn_get_status_message(status), exp_status_allochelper,
zdnn_get_status_message(exp_status_allochelper));
// check and free buffer but only if expected
// zdnn_init_ztensor_with_malloc() to work
if (exp_status_allochelper == ZDNN_OK) {
TEST_ASSERT_MESSAGE_FORMATTED(ztensor.buffer_size == exp_size,
"zdnn_init_quantized_ztensor_with_malloc() "
"returns incorrect size: %" PRIu64
" (expects %" PRIu64 ")",
ztensor.buffer_size, exp_size);
zdnn_free_ztensor_buffer(&ztensor);
}
}
void test_quantized(zdnn_quantized_transform_types type, unsigned int n,
unsigned int h, unsigned int w, unsigned int c, float scale,
float offset, uint64_t exp_size) {
zdnn_tensor_desc pre_tfrmd_desc;
zdnn_init_pre_transformed_desc(ZDNN_NHWC, test_datatype, &pre_tfrmd_desc, n,
h, w, c);
zdnn_tensor_desc tfrmd_desc;
zdnn_status status;
status = zdnn_generate_quantized_transformed_desc(&pre_tfrmd_desc, type,
&tfrmd_desc);
TEST_ASSERT_MESSAGE_FORMATTED(
status == ZDNN_OK,
"zdnn_generate_transformed_desc() status is %08x (%s) "
"but expects %08x (%s))",
status, zdnn_get_status_message(status), ZDNN_OK,
zdnn_get_status_message(ZDNN_OK));
test_quantized_main(&pre_tfrmd_desc, &tfrmd_desc, scale, offset, exp_size,
ZDNN_OK);
}
/// Drive the creation of a FICO/ZRH ztensor with the provided pre-transformed
/// layout, data type and dims, and transformed layout (FICO/ZRH). Then drive
/// allocation and compare to an expected value.
///
/// \param[in] pre_tfrmd_layout pre-transformed layout
/// \param[in] info concatenation info
/// \param[in] exp_size expected allocation size
/// \param[in] exp_status_gen_concat expected status of _desc_concatenated()
/// \param[in] exp_status_allochelper expected status of _allochelper()
/// \param[in] ... dimensions, outermost -> innermost
/// order (ie shape order)
///
/// \return None - Fails test assertion if actual values don't match specified
/// exp values
///
void test_concat(zdnn_data_layouts pre_tfrmd_layout, zdnn_concat_info info,
uint64_t exp_size, zdnn_status exp_status_gen_concat,
zdnn_status exp_status_allochelper, ...) {
zdnn_status status;
zdnn_tensor_desc pre_tfrmd_desc, tfrmd_desc;
uint32_t num_things;
switch (pre_tfrmd_layout) {
case ZDNN_2DS:
case ZDNN_3DS:
num_things = get_data_layout_dims(pre_tfrmd_layout);
break;
default: // for driving an "invalid layout" testcase
num_things = 4;
break;
}
va_list v_list;
va_start(v_list, exp_status_allochelper);
uint32_t dim_nums[num_things];
for (uint32_t i = 0; i < num_things; i++) {
dim_nums[i] = va_arg(v_list, uint32_t);
}
va_end(v_list);
switch (pre_tfrmd_layout) {
case ZDNN_2DS:
zdnn_init_pre_transformed_desc(pre_tfrmd_layout, test_datatype,
&pre_tfrmd_desc, dim_nums[0], dim_nums[1]);
break;
case ZDNN_3DS:
zdnn_init_pre_transformed_desc(pre_tfrmd_layout, test_datatype,
&pre_tfrmd_desc, dim_nums[0], dim_nums[1],
dim_nums[2]);
break;
default:
zdnn_init_pre_transformed_desc(pre_tfrmd_layout, test_datatype,
&pre_tfrmd_desc, dim_nums[0], dim_nums[1],
dim_nums[2], dim_nums[3]);
break;
}
status = zdnn_generate_transformed_desc_concatenated(&pre_tfrmd_desc, info,
&tfrmd_desc);
TEST_ASSERT_MESSAGE_FORMATTED(
status == exp_status_gen_concat,
"zdnn_generate_transformed_desc_concatenated() status is %08x (%s) "
"but expects %08x (%s))",
status, zdnn_get_status_message(status), exp_status_gen_concat,
zdnn_get_status_message(exp_status_gen_concat));
// do the rest if expected zdnn_generate_transformed_desc_concatenated() to
// work
if (exp_status_gen_concat == ZDNN_OK) {
test_main(&pre_tfrmd_desc, &tfrmd_desc, info, exp_size,
exp_status_allochelper);
}
}
void test_NHWC(unsigned int n, unsigned int h, unsigned int w, unsigned int c,
uint64_t exp_size) {
zdnn_tensor_desc pre_tfrmd_desc;
zdnn_init_pre_transformed_desc(ZDNN_NHWC, test_datatype, &pre_tfrmd_desc, n,
h, w, c);
test_normal(&pre_tfrmd_desc, exp_size);
}
void test_2D(unsigned int dim2, unsigned int dim1, uint64_t exp_size) {
zdnn_tensor_desc pre_tfrmd_desc;
zdnn_init_pre_transformed_desc(ZDNN_2D, test_datatype, &pre_tfrmd_desc, dim2,
dim1);
test_normal(&pre_tfrmd_desc, exp_size);
}
/// Drive the creation of a tensor descriptor with the layout
/// ZDNN_2DS and passed in dimensions. This will then call
/// the test_main function to drive allocation and compare to
/// an expected value.
///
/// \param[in] e1 dimension 1
/// \param[in] e2 dimension 2
/// \param[in] exp_size expected allocation size
///
/// \return None
///
void test_2DS(uint32_t dim2, uint32_t dim1, uint64_t exp_size) {
zdnn_tensor_desc pre_tfrmd_desc;
zdnn_init_pre_transformed_desc(ZDNN_2DS, test_datatype, &pre_tfrmd_desc, dim2,
dim1);
test_normal(&pre_tfrmd_desc, exp_size);
}
/// Drive the creation of a tensor descriptor with the layout
/// ZDNN_3DS and passed in dimensions. This will then call
/// the test_main function to drive allocation and compare to
/// an expected value.
///
/// \param[in] e1 dimension 1
/// \param[in] e2 dimension 2
/// \param[in] e3 dimension 3
/// \param[in] exp_size expected allocation size
///
/// \return None
///
void test_3DS(uint32_t dim3, uint32_t dim2, uint32_t dim1, uint64_t exp_size) {
zdnn_tensor_desc pre_tfrmd_desc;
zdnn_init_pre_transformed_desc(ZDNN_3DS, test_datatype, &pre_tfrmd_desc, dim3,
dim2, dim1);
test_normal(&pre_tfrmd_desc, exp_size);
}
void test_NHWC_1x3x3x5() { test_NHWC(1, 3, 3, 5, 12288); }
void test_NHWC_5x32x32x3() { test_NHWC(5, 32, 32, 3, 655360); }
void test_NHWC_1x64x64x64() { test_NHWC(1, 64, 64, 64, 524288); }
void test_NHWC_1x8x8x1() { test_NHWC(1, 8, 8, 1, 32768); }
void test_NHWC_1x256x256x1() { test_NHWC(1, 256, 256, 1, 8388608); }
void test_NHWC_1x1x256x1() { test_NHWC(1, 1, 256, 1, 32768); }
// Different quantized types have different cells per stick. Focus on innermost
// dimension limits.
void test_quantized_DLFLOAT_1x3x3x5() {
test_quantized(QUANTIZED_DLFLOAT16, 1, 3, 3, 5, 5, 6, 12288);
}
void test_quantized_DLFLOAT_1x3x3x64() {
test_quantized(QUANTIZED_DLFLOAT16, 1, 3, 3, 64, 7, 8, 12288);
}
void test_quantized_DLFLOAT_1x3x3x65() {
test_quantized(QUANTIZED_DLFLOAT16, 1, 3, 3, 65, 9, 10, 24576);
}
void test_quantized_INT8_1x3x3x5() {
test_quantized(QUANTIZED_INT8, 1, 3, 3, 5, 5, 6, 12288);
}
void test_quantized_INT8_1x3x3x128() {
test_quantized(QUANTIZED_INT8, 1, 3, 3, 128, 7, 8, 12288);
}
void test_quantized_INT8_1x3x3x129() {
test_quantized(QUANTIZED_INT8, 1, 3, 3, 129, 9, 10, 24576);
}
void test_quantized_WEIGHTS_INT8_1x3x3x5() {
test_quantized(QUANTIZED_WEIGHTS_INT8, 1, 3, 3, 5, 5, 6, 12288);
}
void test_quantized_WEIGHTS_INT8_1x3x3x64() {
test_quantized(QUANTIZED_WEIGHTS_INT8, 1, 3, 3, 64, 7, 8, 12288);
}
void test_quantized_WEIGHTS_INT8_1x3x3x65() {
test_quantized(QUANTIZED_WEIGHTS_INT8, 1, 3, 3, 65, 9, 10, 24576);
}
void test_quantized_WEIGHTS_INT8_1x3x32x64() {
test_quantized(QUANTIZED_WEIGHTS_INT8, 1, 3, 32, 64, 9, 10, 12288);
}
void test_quantized_WEIGHTS_INT8_1x3x33x64() {
test_quantized(QUANTIZED_WEIGHTS_INT8, 1, 3, 33, 64, 9, 10, 12288);
}
void test_quantized_WEIGHTS_INT8_1x3x64x64() {
test_quantized(QUANTIZED_WEIGHTS_INT8, 1, 3, 64, 64, 9, 10, 12288);
}
void test_quantized_WEIGHTS_INT8_1x3x65x64() {
test_quantized(QUANTIZED_WEIGHTS_INT8, 1, 3, 65, 64, 9, 10, 24576);
}
// TODO, will need to drive INT32 scenarios
void test_2D_8x8() { test_2D(8, 8, 4096); }
void test_2DS_1x8() { test_2DS(1, 8, 4096); }
void test_2DS_8x1() { test_2DS(8, 1, 32768); }
void test_2DS_8x8() { test_2DS(8, 8, 32768); }
void test_2DS_32x8() { test_2DS(32, 8, 131072); }
void test_2DS_64x8() { test_2DS(64, 8, 262144); }
void test_2DS_64x64() { test_2DS(64, 64, 262144); }
void test_2DS_256x32() { test_2DS(256, 32, 1048576); }
void test_2DS_256x256() { test_2DS(256, 256, 4194304); }
void test_3DS_1x8x1() { test_3DS(1, 8, 1, 4096); }
void test_3DS_8x8x1() { test_3DS(8, 8, 1, 32768); }
void test_3DS_8x8x8() { test_3DS(8, 8, 8, 32768); }
void test_3DS_16x32x8() { test_3DS(16, 32, 8, 65536); }
void test_3DS_16x64x8() { test_3DS(16, 64, 8, 131072); }
void test_3DS_16x256x32() { test_3DS(16, 256, 32, 524288); }
void test_3DS_16x64x64() { test_3DS(16, 64, 64, 131072); }
void test_3DS_16x256x256() { test_3DS(16, 256, 256, 2097152); }
//------------------------------------------------------------
// any combination of PREV_ UNI/BIDIR + BIASES/HIDDEN_BIASES should yield the
// same results
void test_lstm_biases_1x8() {
for (int i = 0; i < NUM_PREV_LAYERS; i++) {
for (int j = 0; j < NUM_BIASES_USAGES; j++) {
test_concat(ZDNN_2DS, RNN_TYPE_LSTM | prev_layers[i] | biases_usages[j],
16384, ZDNN_OK, ZDNN_OK, 1, 8);
}
}
}
void test_lstm_biases_2x32() {
for (int i = 0; i < NUM_PREV_LAYERS; i++) {
for (int j = 0; j < NUM_BIASES_USAGES; j++) {
test_concat(ZDNN_2DS, RNN_TYPE_LSTM | prev_layers[i] | biases_usages[j],
32768, ZDNN_OK, ZDNN_OK, 2, 32);
}
}
}
void test_lstm_biases_1x64() {
for (int i = 0; i < NUM_PREV_LAYERS; i++) {
for (int j = 0; j < NUM_BIASES_USAGES; j++) {
test_concat(ZDNN_2DS, RNN_TYPE_LSTM | prev_layers[i] | biases_usages[j],
16384, ZDNN_OK, ZDNN_OK, 1, 64);
}
}
}
void test_lstm_biases_2x70() {
for (int i = 0; i < NUM_PREV_LAYERS; i++) {
for (int j = 0; j < NUM_BIASES_USAGES; j++) {
test_concat(ZDNN_2DS, RNN_TYPE_LSTM | prev_layers[i] | biases_usages[j],
65536, ZDNN_OK, ZDNN_OK, 2, 70);
}
}
}
void test_lstm_biases_1x128() {
for (int i = 0; i < NUM_PREV_LAYERS; i++) {
for (int j = 0; j < NUM_BIASES_USAGES; j++) {
test_concat(ZDNN_2DS, RNN_TYPE_LSTM | prev_layers[i] | biases_usages[j],
32768, ZDNN_OK, ZDNN_OK, 1, 128);
}
}
}
void test_lstm_biases_2x150() {
for (int i = 0; i < NUM_PREV_LAYERS; i++) {
for (int j = 0; j < NUM_BIASES_USAGES; j++) {
test_concat(ZDNN_2DS, RNN_TYPE_LSTM | prev_layers[i] | biases_usages[j],
98304, ZDNN_OK, ZDNN_OK, 2, 150);
}
}
}
//------------------------------------------------------------
// PREV_ UNI/BIDIR + HIDDEN_WEIGHTS and UNI + WEIGHTS should yield the same
// results
void test_lstm_no_vconcat_weights_1x2x8() {
for (int i = 0; i < NUM_NO_VCONCAT_INFOS; i++) {
test_concat(ZDNN_3DS, RNN_TYPE_LSTM | no_vconcat_infos[i], 16384, ZDNN_OK,
ZDNN_OK, 1, 2, 8);
}
}
void test_lstm_no_vconcat_weights_2x5x32() {
for (int i = 0; i < NUM_NO_VCONCAT_INFOS; i++) {
test_concat(ZDNN_3DS, RNN_TYPE_LSTM | no_vconcat_infos[i], 32768, ZDNN_OK,
ZDNN_OK, 2, 5, 32);
}
}
void test_lstm_no_vconcat_weights_1x3x64() {
for (int i = 0; i < NUM_NO_VCONCAT_INFOS; i++) {
test_concat(ZDNN_3DS, RNN_TYPE_LSTM | no_vconcat_infos[i], 16384, ZDNN_OK,
ZDNN_OK, 1, 3, 64);
}
}
void test_lstm_no_vconcat_weights_2x10x70() {
for (int i = 0; i < NUM_NO_VCONCAT_INFOS; i++) {
test_concat(ZDNN_3DS, RNN_TYPE_LSTM | no_vconcat_infos[i], 65536, ZDNN_OK,
ZDNN_OK, 2, 10, 70);
}
}
void test_lstm_no_vconcat_weights_1x34x128() {
for (int i = 0; i < NUM_NO_VCONCAT_INFOS; i++) {
test_concat(ZDNN_3DS, RNN_TYPE_LSTM | no_vconcat_infos[i], 65536, ZDNN_OK,
ZDNN_OK, 1, 34, 128);
}
}
void test_lstm_no_vconcat_weights_2x50x150() {
for (int i = 0; i < NUM_NO_VCONCAT_INFOS; i++) {
test_concat(ZDNN_3DS, RNN_TYPE_LSTM | no_vconcat_infos[i], 196608, ZDNN_OK,
ZDNN_OK, 2, 50, 150);
}
}
//------------------------------------------------------------
// lstm_prev_bidir_weights expected size:
// dim3 * (2 * PADDED(dim2/2) / AIU_STICKS_PER_PAGE) *
// ceil(dim1/AIU_2BYTE_CELLS_PER_STICK) *
// * AIU_PAGESIZE_IN_BYTES * 4
void test_lstm_prev_bidir_weights_1x2x8() {
test_concat(ZDNN_3DS, RNN_TYPE_LSTM | PREV_LAYER_BIDIR | USAGE_WEIGHTS, 65536,
ZDNN_OK, ZDNN_OK, 1, 2, 8);
}
void test_lstm_prev_bidir_weights_2x2x8() {
test_concat(ZDNN_3DS, RNN_TYPE_LSTM | PREV_LAYER_BIDIR | USAGE_WEIGHTS,
131072, ZDNN_OK, ZDNN_OK, 2, 2, 8);
}
void test_lstm_prev_bidir_weights_1x34x8() {
test_concat(ZDNN_3DS, RNN_TYPE_LSTM | PREV_LAYER_BIDIR | USAGE_WEIGHTS, 65536,
ZDNN_OK, ZDNN_OK, 1, 34, 8);
}
void test_lstm_prev_bidir_weights_2x34x8() {
test_concat(ZDNN_3DS, RNN_TYPE_LSTM | PREV_LAYER_BIDIR | USAGE_WEIGHTS,
131072, ZDNN_OK, ZDNN_OK, 2, 34, 8);
}
void test_lstm_prev_bidir_weights_1x64x10() {
test_concat(ZDNN_3DS, RNN_TYPE_LSTM | PREV_LAYER_BIDIR | USAGE_WEIGHTS, 65536,
ZDNN_OK, ZDNN_OK, 1, 64, 10);
}
void test_lstm_prev_bidir_weights_2x64x10() {
test_concat(ZDNN_3DS, RNN_TYPE_LSTM | PREV_LAYER_BIDIR | USAGE_WEIGHTS,
131072, ZDNN_OK, ZDNN_OK, 2, 64, 10);
}
void test_lstm_prev_bidir_weights_1x70x20() {
test_concat(ZDNN_3DS, RNN_TYPE_LSTM | PREV_LAYER_BIDIR | USAGE_WEIGHTS, 65536,
ZDNN_OK, ZDNN_OK, 1, 70, 20);
}
void test_lstm_prev_bidir_weights_2x70x20() {
test_concat(ZDNN_3DS, RNN_TYPE_LSTM | PREV_LAYER_BIDIR | USAGE_WEIGHTS,
131072, ZDNN_OK, ZDNN_OK, 2, 70, 20);
}
void test_lstm_prev_bidir_weights_1x10x32() {
test_concat(ZDNN_3DS, RNN_TYPE_LSTM | PREV_LAYER_BIDIR | USAGE_WEIGHTS, 65536,
ZDNN_OK, ZDNN_OK, 1, 10, 32);
}
void test_lstm_prev_bidir_weights_2x10x32() {
test_concat(ZDNN_3DS, RNN_TYPE_LSTM | PREV_LAYER_BIDIR | USAGE_WEIGHTS,
131072, ZDNN_OK, ZDNN_OK, 2, 10, 32);
}
void test_lstm_prev_bidir_weights_1x6x64() {
test_concat(ZDNN_3DS, RNN_TYPE_LSTM | PREV_LAYER_BIDIR | USAGE_WEIGHTS, 65536,
ZDNN_OK, ZDNN_OK, 1, 6, 64);
}
void test_lstm_prev_bidir_weights_2x6x64() {
test_concat(ZDNN_3DS, RNN_TYPE_LSTM | PREV_LAYER_BIDIR | USAGE_WEIGHTS,
131072, ZDNN_OK, ZDNN_OK, 2, 6, 64);
}
void test_lstm_prev_bidir_weights_1x10x70() {
test_concat(ZDNN_3DS, RNN_TYPE_LSTM | PREV_LAYER_BIDIR | USAGE_WEIGHTS,
131072, ZDNN_OK, ZDNN_OK, 1, 10, 70);
}
void test_lstm_prev_bidir_weights_2x10x70() {
test_concat(ZDNN_3DS, RNN_TYPE_LSTM | PREV_LAYER_BIDIR | USAGE_WEIGHTS,
262144, ZDNN_OK, ZDNN_OK, 2, 10, 70);
}
void test_lstm_prev_bidir_weights_1x34x128() {
test_concat(ZDNN_3DS, RNN_TYPE_LSTM | PREV_LAYER_BIDIR | USAGE_WEIGHTS,
131072, ZDNN_OK, ZDNN_OK, 1, 34, 128);
}
void test_lstm_prev_bidir_weights_2x34x128() {
test_concat(ZDNN_3DS, RNN_TYPE_LSTM | PREV_LAYER_BIDIR | USAGE_WEIGHTS,
262144, ZDNN_OK, ZDNN_OK, 2, 34, 128);
}
void test_lstm_prev_bidir_weights_1x50x150() {
test_concat(ZDNN_3DS, RNN_TYPE_LSTM | PREV_LAYER_BIDIR | USAGE_WEIGHTS,
196608, ZDNN_OK, ZDNN_OK, 1, 50, 150);
}
void test_lstm_prev_bidir_weights_2x50x150() {
test_concat(ZDNN_3DS, RNN_TYPE_LSTM | PREV_LAYER_BIDIR | USAGE_WEIGHTS,
393216, ZDNN_OK, ZDNN_OK, 2, 50, 150);
}
//------------------------------------------------------------
void test_CONCAT_LSTM_fail_unsupported_layout() {
// bad layout: ZDNN_4D as pre-transformed yields ZDNN_INVALID_LAYOUT
test_concat(ZDNN_4D, RNN_TYPE_LSTM | PREV_LAYER_UNI | USAGE_WEIGHTS, 0,
ZDNN_INVALID_LAYOUT, 0, 1, 2, 3, 4);
}
void test_CONCAT_LSTM_max_dim1() {
// Confirm we pass when at the maximum number of dim1 elements
// LSTM concatenates 4 gates.
uint32_t max_dim1 = max_concat_dim1(4);
// If MDnIS exists, use larger number; otherwise keep Telum I value.
uint64_t expected_size =
nnpa_query_result.max_dim1_index_size ? 134217728 : 2097152;
test_concat(ZDNN_2DS, USAGE_BIASES | RNN_TYPE_LSTM | PREV_LAYER_UNI,
expected_size, ZDNN_OK, ZDNN_OK, 1, max_dim1);
}
void test_CONCAT_LSTM_fail_dim1_too_big() {
// zdnn_generate_transformed_desc_concatenated() yields no error but
// zdnn_allochelper() yields ZDNN_DATA_ERROR during it's checks.
// LSTM concatenates 4 gates.
uint32_t max_dim1 = max_concat_dim1(4);
test_concat(ZDNN_2DS, USAGE_BIASES | RNN_TYPE_LSTM | PREV_LAYER_UNI, 0,
ZDNN_OK, ZDNN_INVALID_SHAPE, 1, max_dim1 + 1);
}
//------------------------------------------------------------
// test_gru_* tests are based off test_lstm_*, with smaller expected sizes ( =
// 3/4 of test_lstm__*'s )
void test_gru_biases_1x8() {
for (int i = 0; i < NUM_PREV_LAYERS; i++) {
for (int j = 0; j < NUM_BIASES_USAGES; j++) {
test_concat(ZDNN_2DS, RNN_TYPE_GRU | prev_layers[i] | biases_usages[j],
12288, ZDNN_OK, ZDNN_OK, 1, 8);
}
}
}
void test_gru_biases_2x32() {
for (int i = 0; i < NUM_PREV_LAYERS; i++) {
for (int j = 0; j < NUM_BIASES_USAGES; j++) {
test_concat(ZDNN_2DS, RNN_TYPE_GRU | prev_layers[i] | biases_usages[j],
24576, ZDNN_OK, ZDNN_OK, 2, 32);
}
}
}
void test_gru_biases_1x64() {
for (int i = 0; i < NUM_PREV_LAYERS; i++) {
for (int j = 0; j < NUM_BIASES_USAGES; j++) {
test_concat(ZDNN_2DS, RNN_TYPE_GRU | prev_layers[i] | biases_usages[j],
12288, ZDNN_OK, ZDNN_OK, 1, 64);
}
}
}
void test_gru_biases_2x70() {
for (int i = 0; i < NUM_PREV_LAYERS; i++) {
for (int j = 0; j < NUM_BIASES_USAGES; j++) {
test_concat(ZDNN_2DS, RNN_TYPE_GRU | prev_layers[i] | biases_usages[j],
49152, ZDNN_OK, ZDNN_OK, 2, 70);
}
}
}
void test_gru_biases_1x128() {
for (int i = 0; i < NUM_PREV_LAYERS; i++) {
for (int j = 0; j < NUM_BIASES_USAGES; j++) {
test_concat(ZDNN_2DS, RNN_TYPE_GRU | prev_layers[i] | biases_usages[j],
24576, ZDNN_OK, ZDNN_OK, 1, 128);
}
}
}
void test_gru_biases_2x150() {
for (int i = 0; i < NUM_PREV_LAYERS; i++) {
for (int j = 0; j < NUM_BIASES_USAGES; j++) {
test_concat(ZDNN_2DS, RNN_TYPE_GRU | prev_layers[i] | biases_usages[j],
73728, ZDNN_OK, ZDNN_OK, 2, 150);
}
}
}
//------------------------------------------------------------
// PREV_ UNI/BIDIR + HIDDEN_WEIGHTS and UNI + WEIGHTS should yield the same
// results
void test_gru_no_vconcat_weights_1x2x8() {
for (int i = 0; i < NUM_NO_VCONCAT_INFOS; i++) {
test_concat(ZDNN_3DS, RNN_TYPE_GRU | no_vconcat_infos[i], 12288, ZDNN_OK,
ZDNN_OK, 1, 2, 8);
}
}
void test_gru_no_vconcat_weights_2x5x32() {
for (int i = 0; i < NUM_NO_VCONCAT_INFOS; i++) {
test_concat(ZDNN_3DS, RNN_TYPE_GRU | no_vconcat_infos[i], 24576, ZDNN_OK,
ZDNN_OK, 2, 5, 32);
}
}
void test_gru_no_vconcat_weights_1x3x64() {
for (int i = 0; i < NUM_NO_VCONCAT_INFOS; i++) {
test_concat(ZDNN_3DS, RNN_TYPE_GRU | no_vconcat_infos[i], 12288, ZDNN_OK,
ZDNN_OK, 1, 3, 64);
}
}
void test_gru_no_vconcat_weights_2x10x70() {
for (int i = 0; i < NUM_NO_VCONCAT_INFOS; i++) {
test_concat(ZDNN_3DS, RNN_TYPE_GRU | no_vconcat_infos[i], 49152, ZDNN_OK,
ZDNN_OK, 2, 10, 70);
}
}
void test_gru_no_vconcat_weights_1x34x128() {
for (int i = 0; i < NUM_NO_VCONCAT_INFOS; i++) {
test_concat(ZDNN_3DS, RNN_TYPE_GRU | no_vconcat_infos[i], 49152, ZDNN_OK,
ZDNN_OK, 1, 34, 128);
}
}
void test_gru_no_vconcat_weights_2x50x150() {
for (int i = 0; i < NUM_NO_VCONCAT_INFOS; i++) {
test_concat(ZDNN_3DS, RNN_TYPE_GRU | no_vconcat_infos[i], 147456, ZDNN_OK,
ZDNN_OK, 2, 50, 150);
}
}
//------------------------------------------------------------
// gru_prev_bidir_weights expected size:
// dim3 * (2 * PADDED(dim2/2) / AIU_STICKS_PER_PAGE) *
// ceil(dim1/AIU_2BYTE_CELLS_PER_STICK) *
// * AIU_PAGESIZE_IN_BYTES * 3
void test_gru_prev_bidir_weights_1x2x8() {
test_concat(ZDNN_3DS, RNN_TYPE_GRU | PREV_LAYER_BIDIR | USAGE_WEIGHTS, 49152,
ZDNN_OK, ZDNN_OK, 1, 2, 8);
}
void test_gru_prev_bidir_weights_2x2x8() {
test_concat(ZDNN_3DS, RNN_TYPE_GRU | PREV_LAYER_BIDIR | USAGE_WEIGHTS, 98304,
ZDNN_OK, ZDNN_OK, 2, 2, 8);
}
void test_gru_prev_bidir_weights_1x34x8() {
test_concat(ZDNN_3DS, RNN_TYPE_GRU | PREV_LAYER_BIDIR | USAGE_WEIGHTS, 49152,
ZDNN_OK, ZDNN_OK, 1, 34, 8);
}
void test_gru_prev_bidir_weights_2x34x8() {
test_concat(ZDNN_3DS, RNN_TYPE_GRU | PREV_LAYER_BIDIR | USAGE_WEIGHTS, 98304,
ZDNN_OK, ZDNN_OK, 2, 34, 8);
}
void test_gru_prev_bidir_weights_1x64x10() {
test_concat(ZDNN_3DS, RNN_TYPE_GRU | PREV_LAYER_BIDIR | USAGE_WEIGHTS, 49152,
ZDNN_OK, ZDNN_OK, 1, 64, 10);
}
void test_gru_prev_bidir_weights_2x64x10() {
test_concat(ZDNN_3DS, RNN_TYPE_GRU | PREV_LAYER_BIDIR | USAGE_WEIGHTS, 98304,
ZDNN_OK, ZDNN_OK, 2, 64, 10);
}
void test_gru_prev_bidir_weights_1x70x20() {
test_concat(ZDNN_3DS, RNN_TYPE_GRU | PREV_LAYER_BIDIR | USAGE_WEIGHTS, 49152,
ZDNN_OK, ZDNN_OK, 1, 70, 20);
}
void test_gru_prev_bidir_weights_2x70x20() {
test_concat(ZDNN_3DS, RNN_TYPE_GRU | PREV_LAYER_BIDIR | USAGE_WEIGHTS, 98304,
ZDNN_OK, ZDNN_OK, 2, 70, 20);
}
void test_gru_prev_bidir_weights_1x10x32() {
test_concat(ZDNN_3DS, RNN_TYPE_GRU | PREV_LAYER_BIDIR | USAGE_WEIGHTS, 49152,
ZDNN_OK, ZDNN_OK, 1, 10, 32);
}
void test_gru_prev_bidir_weights_2x10x32() {
test_concat(ZDNN_3DS, RNN_TYPE_GRU | PREV_LAYER_BIDIR | USAGE_WEIGHTS, 98304,
ZDNN_OK, ZDNN_OK, 2, 10, 32);
}
void test_gru_prev_bidir_weights_1x6x64() {
test_concat(ZDNN_3DS, RNN_TYPE_GRU | PREV_LAYER_BIDIR | USAGE_WEIGHTS, 49152,
ZDNN_OK, ZDNN_OK, 1, 6, 64);
}
void test_gru_prev_bidir_weights_2x6x64() {
test_concat(ZDNN_3DS, RNN_TYPE_GRU | PREV_LAYER_BIDIR | USAGE_WEIGHTS, 98304,
ZDNN_OK, ZDNN_OK, 2, 6, 64);
}
void test_gru_prev_bidir_weights_1x10x70() {
test_concat(ZDNN_3DS, RNN_TYPE_GRU | PREV_LAYER_BIDIR | USAGE_WEIGHTS, 98304,
ZDNN_OK, ZDNN_OK, 1, 10, 70);
}
void test_gru_prev_bidir_weights_2x10x70() {
test_concat(ZDNN_3DS, RNN_TYPE_GRU | PREV_LAYER_BIDIR | USAGE_WEIGHTS, 196608,
ZDNN_OK, ZDNN_OK, 2, 10, 70);
}
void test_gru_prev_bidir_weights_1x34x128() {
test_concat(ZDNN_3DS, RNN_TYPE_GRU | PREV_LAYER_BIDIR | USAGE_WEIGHTS, 98304,
ZDNN_OK, ZDNN_OK, 1, 34, 128);
}
void test_gru_prev_bidir_weights_2x34x128() {
test_concat(ZDNN_3DS, RNN_TYPE_GRU | PREV_LAYER_BIDIR | USAGE_WEIGHTS, 196608,
ZDNN_OK, ZDNN_OK, 2, 34, 128);
}
void test_gru_prev_bidir_weights_1x50x150() {
test_concat(ZDNN_3DS, RNN_TYPE_GRU | PREV_LAYER_BIDIR | USAGE_WEIGHTS, 147456,
ZDNN_OK, ZDNN_OK, 1, 50, 150);
}
void test_gru_prev_bidir_weights_2x50x150() {
test_concat(ZDNN_3DS, RNN_TYPE_GRU | PREV_LAYER_BIDIR | USAGE_WEIGHTS, 294912,
ZDNN_OK, ZDNN_OK, 2, 50, 150);
}
//------------------------------------------------------------
void test_CONCAT_GRU_fail_unsupported_layout() {
// bad layout: ZDNN_4D as pre-transformed yields ZDNN_INVALID_LAYOUT
test_concat(ZDNN_4D, RNN_TYPE_GRU | PREV_LAYER_UNI | USAGE_WEIGHTS, 0,
ZDNN_INVALID_LAYOUT, 1, 2, 3, 4);
}
void test_CONCAT_GRU_max_dim1() {
// Confirm we pass when at the maximum number of dim1 elements
// GRU concatenates 3 gates.
uint64_t max_dim1 = max_concat_dim1(3);
// If MDnIS exists, use larger number; otherwise keep Telum I value.
uint64_t expected_size =
nnpa_query_result.max_dim1_index_size ? 134209536 : 2088960;
test_concat(ZDNN_2DS, RNN_TYPE_GRU | PREV_LAYER_UNI | USAGE_BIASES,
expected_size, ZDNN_OK, ZDNN_OK, 1, max_dim1);
}
void test_CONCAT_GRU_fail_dim1_too_big() {
// zdnn_generate_transformed_desc_concatenated() yields no error but
// zdnn_allochelper() yields ZDNN_DATA_ERROR during it's checks.
// GRU concatenates 3 gates.
uint64_t max_dim1 = max_concat_dim1(3);
test_concat(ZDNN_2DS, RNN_TYPE_GRU | PREV_LAYER_UNI | USAGE_BIASES, 0,
ZDNN_OK, ZDNN_INVALID_SHAPE, 1, max_dim1 + 1);
}
//------------------------------------------------------------
void test_rnn_output(uint32_t dim4, uint32_t dim3, uint32_t dim2, uint32_t dim1,
uint64_t exp_size) {
zdnn_tensor_desc pre_tfrmd_desc;
zdnn_init_pre_transformed_desc(ZDNN_4DS, test_datatype, &pre_tfrmd_desc, dim4,
dim3, dim2, dim1);
test_normal(&pre_tfrmd_desc, exp_size);
}
void test_uni_output_1x1x2x8() { test_rnn_output(1, 1, 2, 8, 4096); }
void test_uni_output_2x1x5x32() { test_rnn_output(2, 1, 5, 32, 8192); }
void test_uni_output_1x1x3x64() { test_rnn_output(1, 1, 3, 64, 4096); }
void test_uni_output_2x1x10x70() { test_rnn_output(2, 1, 10, 70, 16384); }
void test_uni_output_1x1x34x128() { test_rnn_output(1, 1, 34, 128, 16384); }
void test_uni_output_2x1x50x150() { test_rnn_output(2, 1, 50, 150, 49152); }
void test_bidir_output_1x2x2x8() { test_rnn_output(1, 2, 2, 8, 8192); }
void test_bidir_output_2x2x5x32() { test_rnn_output(2, 2, 5, 32, 16384); }
void test_bidir_output_1x2x3x64() { test_rnn_output(1, 2, 3, 64, 8192); }
void test_bidir_output_2x2x10x70() { test_rnn_output(2, 2, 10, 70, 32768); }
void test_bidir_output_1x2x34x128() { test_rnn_output(1, 2, 34, 128, 32768); }
void test_bidir_output_2x2x50x150() { test_rnn_output(2, 2, 50, 150, 98304); }
//------------------------------------------------------------
void test_bidir_output_max_dim1() {
// Confirm we pass when at the maximum number of dim1 elements
zdnn_tensor_desc pre_tfrmd_desc, tfrmd_desc;
uint64_t max_dim1 = max_concat_dim1(2);
zdnn_init_pre_transformed_desc(ZDNN_4DS, test_datatype, &pre_tfrmd_desc, 1, 2,
2, max_dim1);
zdnn_status status =
zdnn_generate_transformed_desc(&pre_tfrmd_desc, &tfrmd_desc);
TEST_ASSERT_MESSAGE_FORMATTED(
status == ZDNN_OK,
"zdnn_generate_transformed_desc() status is %08x (%s) "
"but expects %08x (%s))",
status, zdnn_get_status_message(status), ZDNN_OK,
zdnn_get_status_message(ZDNN_OK));
// If MDnIS exists, use larger number; otherwise keep Telum I value.
uint64_t expected_size =
nnpa_query_result.max_dim1_index_size ? 134217728 : 2097152;
test_main(&pre_tfrmd_desc, &tfrmd_desc, NO_CONCAT, expected_size, ZDNN_OK);
}
void test_bidir_output_fail_dim1_too_big() {
zdnn_tensor_desc pre_tfrmd_desc, tfrmd_desc;
uint64_t max_dim1 = max_concat_dim1(2);
zdnn_init_pre_transformed_desc(ZDNN_4DS, test_datatype, &pre_tfrmd_desc, 1, 2,
3, max_dim1 + 1);
// zdnn_generate_transformed_desc_concatenated() yields no error but
// zdnn_allochelper() yields ZDNN_DATA_ERROR during it's checks.
zdnn_status status =
zdnn_generate_transformed_desc(&pre_tfrmd_desc, &tfrmd_desc);
TEST_ASSERT_MESSAGE_FORMATTED(
status == ZDNN_OK,
"zdnn_generate_transformed_desc() status is %08x (%s) "
"but expects %08x (%s))",
status, zdnn_get_status_message(status), ZDNN_OK,
zdnn_get_status_message(ZDNN_OK));
test_main(&pre_tfrmd_desc, &tfrmd_desc, NO_CONCAT, 9999, ZDNN_INVALID_SHAPE);
}
void test_zdnn_init_ztensor_function() {
zdnn_tensor_desc pre_tfrmd_desc, tfrmd_desc;
zdnn_ztensor ztensor;
// Set ztensor to all 1s prior to function call.
memset(&ztensor, 1, sizeof(ztensor));
zdnn_init_ztensor(&pre_tfrmd_desc, &tfrmd_desc, &ztensor);
TEST_ASSERT_MESSAGE(
ztensor.pre_transformed_desc == &pre_tfrmd_desc,
"Expected ztensor to point to passed in pre-transformed descriptor.");
TEST_ASSERT_MESSAGE(
ztensor.transformed_desc == &tfrmd_desc,
"Expected ztensor to point to passed in transformed descriptor.");
TEST_ASSERT_MESSAGE(
false == ztensor.is_transformed,
"Expected ztensor to have is_transformed initialized as false.");
// We expect reserved area to be all zeros, create variable for memcmp
char expected_reserved[sizeof(ztensor.reserved)] = {0};
TEST_ASSERT_MESSAGE(
memcmp(expected_reserved, ztensor.reserved, sizeof(expected_reserved)) ==
0,
"Expected ztensor reserved area not initialized to zeroes.");
// We expect reserved2 area to be all zeros, create variable for memcmp
char expected_reserved2[sizeof(ztensor.reserved2)] = {0};
TEST_ASSERT_MESSAGE(
memcmp(expected_reserved2, ztensor.reserved2,
sizeof(expected_reserved2)) == 0,
"Expected ztensor reserved2 area not initialized to zeroes.");
}
void test_zdnn_init_ztensor_via_malloc_function() {
zdnn_tensor_desc pre_tfrmd_desc, tfrmd_desc;
zdnn_ztensor ztensor;
// Create a very basic descriptors to satisfy malloc portion of init function
zdnn_init_pre_transformed_desc(ZDNN_NHWC, FP32, &pre_tfrmd_desc, 1, 1, 1, 1);
zdnn_generate_transformed_desc(&pre_tfrmd_desc, &tfrmd_desc);
// Set ztensor to all 1s prior to function call.
memset(&ztensor, 1, sizeof(ztensor));
zdnn_init_ztensor_with_malloc(&pre_tfrmd_desc, &tfrmd_desc, &ztensor);
TEST_ASSERT_MESSAGE(
ztensor.pre_transformed_desc == &pre_tfrmd_desc,
"Expected ztensor to point to passed in pre-transformed descriptor.");
TEST_ASSERT_MESSAGE(
ztensor.transformed_desc == &tfrmd_desc,
"Expected ztensor to point to passed in transformed descriptor.");
TEST_ASSERT_MESSAGE(
false == ztensor.is_transformed,
"Expected ztensor to have is_transformed initialized as false.");
// We expect reserved area to be all zeros, create variable for memcmp
char expected_reserved[sizeof(ztensor.reserved)] = {0};
TEST_ASSERT_MESSAGE(
memcmp(expected_reserved, ztensor.reserved, sizeof(expected_reserved)) ==
0,
"Expected ztensor reserved area not initialized to zeroes.");
// We expect reserved2 area to be all zeros, create variable for memcmp
char expected_reserved2[sizeof(ztensor.reserved2)] = {0};
TEST_ASSERT_MESSAGE(
memcmp(expected_reserved2, ztensor.reserved2,
sizeof(expected_reserved2)) == 0,
"Expected ztensor reserved2 area not initialized to zeroes.");
zdnn_free_ztensor_buffer(&ztensor);
}
void test_zdnn_is_quantized_ztensor_scale() {
zdnn_tensor_desc pre_tfrmd_desc, tfrmd_desc;
zdnn_ztensor ztensor;
zdnn_init_pre_transformed_desc(ZDNN_NHWC, FP32, &pre_tfrmd_desc, 1, 1, 1, 1);
zdnn_generate_transformed_desc(&pre_tfrmd_desc, &tfrmd_desc);
zdnn_init_ztensor(&pre_tfrmd_desc, &tfrmd_desc, &ztensor);
ztensor.rec_scale = 0.2;
TEST_ASSERT_MESSAGE(zdnn_is_quantized_ztensor(&ztensor) == true,
"Expected ztensor not indicated as a quantized ztensor.");
}
void test_zdnn_is_quantized_ztensor_false() {
zdnn_tensor_desc pre_tfrmd_desc, tfrmd_desc;
zdnn_ztensor ztensor;
zdnn_init_pre_transformed_desc(ZDNN_NHWC, FP32, &pre_tfrmd_desc, 1, 1, 1, 1);
zdnn_generate_transformed_desc(&pre_tfrmd_desc, &tfrmd_desc);
zdnn_init_ztensor(&pre_tfrmd_desc, &tfrmd_desc, &ztensor);
ztensor.rec_scale = 0;
TEST_ASSERT_MESSAGE(zdnn_is_quantized_ztensor(&ztensor) == false,
"Expected ztensor indicated as a quantized ztensor.");
}
int main(void) {
UNITY_BEGIN();
RUN_TEST_ALL_DLFLOAT16_PRE_DATATYPES(test_NHWC_1x3x3x5);
RUN_TEST_ALL_DLFLOAT16_PRE_DATATYPES(test_NHWC_5x32x32x3);
RUN_TEST_ALL_DLFLOAT16_PRE_DATATYPES(test_NHWC_1x64x64x64);
RUN_TEST_ALL_DLFLOAT16_PRE_DATATYPES(test_NHWC_1x8x8x1);
RUN_TEST_ALL_DLFLOAT16_PRE_DATATYPES(test_NHWC_1x256x256x1);
RUN_TEST_ALL_DLFLOAT16_PRE_DATATYPES(test_NHWC_1x1x256x1);
RUN_TEST_ALL_DLFLOAT16_PRE_DATATYPES(test_quantized_DLFLOAT_1x3x3x5);
RUN_TEST_ALL_DLFLOAT16_PRE_DATATYPES(test_quantized_DLFLOAT_1x3x3x64);
RUN_TEST_ALL_DLFLOAT16_PRE_DATATYPES(test_quantized_DLFLOAT_1x3x3x65);
RUN_TEST_ALL_DLFLOAT16_PRE_DATATYPES(test_quantized_INT8_1x3x3x5);
RUN_TEST_ALL_DLFLOAT16_PRE_DATATYPES(test_quantized_INT8_1x3x3x128);
RUN_TEST_ALL_DLFLOAT16_PRE_DATATYPES(test_quantized_INT8_1x3x3x129);
RUN_TEST(test_quantized_WEIGHTS_INT8_1x3x3x5);
RUN_TEST(test_quantized_WEIGHTS_INT8_1x3x3x64);
RUN_TEST(test_quantized_WEIGHTS_INT8_1x3x3x65);
RUN_TEST(test_quantized_WEIGHTS_INT8_1x3x32x64);
RUN_TEST(test_quantized_WEIGHTS_INT8_1x3x33x64);
RUN_TEST(test_quantized_WEIGHTS_INT8_1x3x64x64);
RUN_TEST(test_quantized_WEIGHTS_INT8_1x3x65x64);
RUN_TEST_ALL_DLFLOAT16_PRE_DATATYPES(test_2D_8x8);
RUN_TEST_ALL_DLFLOAT16_PRE_DATATYPES(test_2DS_1x8);
RUN_TEST_ALL_DLFLOAT16_PRE_DATATYPES(test_2DS_8x1);
RUN_TEST_ALL_DLFLOAT16_PRE_DATATYPES(test_2DS_8x8);
RUN_TEST_ALL_DLFLOAT16_PRE_DATATYPES(test_2DS_32x8);
RUN_TEST_ALL_DLFLOAT16_PRE_DATATYPES(test_2DS_64x8);
RUN_TEST_ALL_DLFLOAT16_PRE_DATATYPES(test_2DS_256x32);
RUN_TEST_ALL_DLFLOAT16_PRE_DATATYPES(test_2DS_64x64);
RUN_TEST_ALL_DLFLOAT16_PRE_DATATYPES(test_2DS_256x256);
RUN_TEST_ALL_DLFLOAT16_PRE_DATATYPES(test_3DS_1x8x1);
RUN_TEST_ALL_DLFLOAT16_PRE_DATATYPES(test_3DS_8x8x1);
RUN_TEST_ALL_DLFLOAT16_PRE_DATATYPES(test_3DS_8x8x8);
RUN_TEST_ALL_DLFLOAT16_PRE_DATATYPES(test_3DS_16x32x8);
RUN_TEST_ALL_DLFLOAT16_PRE_DATATYPES(test_3DS_16x64x8);
RUN_TEST_ALL_DLFLOAT16_PRE_DATATYPES(test_3DS_16x256x32);
RUN_TEST_ALL_DLFLOAT16_PRE_DATATYPES(test_3DS_16x64x64);
RUN_TEST_ALL_DLFLOAT16_PRE_DATATYPES(test_3DS_16x256x256);
RUN_TEST_ALL_DLFLOAT16_PRE_DATATYPES(test_lstm_biases_1x8);
RUN_TEST_ALL_DLFLOAT16_PRE_DATATYPES(test_lstm_biases_2x32);
RUN_TEST_ALL_DLFLOAT16_PRE_DATATYPES(test_lstm_biases_1x64);
RUN_TEST_ALL_DLFLOAT16_PRE_DATATYPES(test_lstm_biases_2x70);
RUN_TEST_ALL_DLFLOAT16_PRE_DATATYPES(test_lstm_biases_1x128);
RUN_TEST_ALL_DLFLOAT16_PRE_DATATYPES(test_lstm_biases_2x150);
RUN_TEST_ALL_DLFLOAT16_PRE_DATATYPES(test_lstm_no_vconcat_weights_1x2x8);
RUN_TEST_ALL_DLFLOAT16_PRE_DATATYPES(test_lstm_no_vconcat_weights_2x5x32);
RUN_TEST_ALL_DLFLOAT16_PRE_DATATYPES(test_lstm_prev_bidir_weights_1x2x8);
RUN_TEST_ALL_DLFLOAT16_PRE_DATATYPES(test_lstm_prev_bidir_weights_2x2x8);
RUN_TEST_ALL_DLFLOAT16_PRE_DATATYPES(test_lstm_prev_bidir_weights_1x34x8);
RUN_TEST_ALL_DLFLOAT16_PRE_DATATYPES(test_lstm_prev_bidir_weights_2x34x8);
RUN_TEST_ALL_DLFLOAT16_PRE_DATATYPES(test_lstm_prev_bidir_weights_1x64x10);
RUN_TEST_ALL_DLFLOAT16_PRE_DATATYPES(test_lstm_prev_bidir_weights_2x64x10);
RUN_TEST_ALL_DLFLOAT16_PRE_DATATYPES(test_lstm_prev_bidir_weights_1x70x20);
RUN_TEST_ALL_DLFLOAT16_PRE_DATATYPES(test_lstm_prev_bidir_weights_2x70x20);
RUN_TEST_ALL_DLFLOAT16_PRE_DATATYPES(test_lstm_prev_bidir_weights_1x10x32);
RUN_TEST_ALL_DLFLOAT16_PRE_DATATYPES(test_lstm_prev_bidir_weights_2x10x32);
RUN_TEST_ALL_DLFLOAT16_PRE_DATATYPES(test_lstm_no_vconcat_weights_1x3x64);
RUN_TEST_ALL_DLFLOAT16_PRE_DATATYPES(test_lstm_no_vconcat_weights_2x10x70);
RUN_TEST_ALL_DLFLOAT16_PRE_DATATYPES(test_lstm_no_vconcat_weights_1x34x128);
RUN_TEST_ALL_DLFLOAT16_PRE_DATATYPES(test_lstm_no_vconcat_weights_2x50x150);
RUN_TEST_ALL_DLFLOAT16_PRE_DATATYPES(test_lstm_prev_bidir_weights_1x6x64);
RUN_TEST_ALL_DLFLOAT16_PRE_DATATYPES(test_lstm_prev_bidir_weights_2x6x64);
RUN_TEST_ALL_DLFLOAT16_PRE_DATATYPES(test_lstm_prev_bidir_weights_1x10x70);
RUN_TEST_ALL_DLFLOAT16_PRE_DATATYPES(test_lstm_prev_bidir_weights_2x10x70);
RUN_TEST_ALL_DLFLOAT16_PRE_DATATYPES(test_lstm_prev_bidir_weights_1x34x128);
RUN_TEST_ALL_DLFLOAT16_PRE_DATATYPES(test_lstm_prev_bidir_weights_2x34x128);
RUN_TEST_ALL_DLFLOAT16_PRE_DATATYPES(test_lstm_prev_bidir_weights_1x50x150);
RUN_TEST_ALL_DLFLOAT16_PRE_DATATYPES(test_lstm_prev_bidir_weights_2x50x150);
RUN_TEST_ALL_DLFLOAT16_PRE_DATATYPES(test_CONCAT_LSTM_max_dim1);
RUN_TEST_ALL_DLFLOAT16_PRE_DATATYPES(
test_CONCAT_LSTM_fail_unsupported_layout);
RUN_TEST_ALL_DLFLOAT16_PRE_DATATYPES(test_CONCAT_LSTM_fail_dim1_too_big);
RUN_TEST_ALL_DLFLOAT16_PRE_DATATYPES(test_gru_biases_1x8);
RUN_TEST_ALL_DLFLOAT16_PRE_DATATYPES(test_gru_biases_2x32);
RUN_TEST_ALL_DLFLOAT16_PRE_DATATYPES(test_gru_biases_1x64);
RUN_TEST_ALL_DLFLOAT16_PRE_DATATYPES(test_gru_biases_2x70);
RUN_TEST_ALL_DLFLOAT16_PRE_DATATYPES(test_gru_biases_1x128);
RUN_TEST_ALL_DLFLOAT16_PRE_DATATYPES(test_gru_biases_2x150);
RUN_TEST_ALL_DLFLOAT16_PRE_DATATYPES(test_gru_no_vconcat_weights_1x2x8);
RUN_TEST_ALL_DLFLOAT16_PRE_DATATYPES(test_gru_no_vconcat_weights_2x5x32);
RUN_TEST_ALL_DLFLOAT16_PRE_DATATYPES(test_gru_prev_bidir_weights_1x2x8);
RUN_TEST_ALL_DLFLOAT16_PRE_DATATYPES(test_gru_prev_bidir_weights_2x2x8);
RUN_TEST_ALL_DLFLOAT16_PRE_DATATYPES(test_gru_prev_bidir_weights_1x34x8);
RUN_TEST_ALL_DLFLOAT16_PRE_DATATYPES(test_gru_prev_bidir_weights_2x34x8);
RUN_TEST_ALL_DLFLOAT16_PRE_DATATYPES(test_gru_prev_bidir_weights_1x64x10);
RUN_TEST_ALL_DLFLOAT16_PRE_DATATYPES(test_gru_prev_bidir_weights_2x64x10);
RUN_TEST_ALL_DLFLOAT16_PRE_DATATYPES(test_gru_prev_bidir_weights_1x70x20);
RUN_TEST_ALL_DLFLOAT16_PRE_DATATYPES(test_gru_prev_bidir_weights_2x70x20);
RUN_TEST_ALL_DLFLOAT16_PRE_DATATYPES(test_gru_prev_bidir_weights_1x10x32);
RUN_TEST_ALL_DLFLOAT16_PRE_DATATYPES(test_gru_prev_bidir_weights_2x10x32);
RUN_TEST_ALL_DLFLOAT16_PRE_DATATYPES(test_gru_no_vconcat_weights_1x3x64);
RUN_TEST_ALL_DLFLOAT16_PRE_DATATYPES(test_gru_no_vconcat_weights_2x10x70);
RUN_TEST_ALL_DLFLOAT16_PRE_DATATYPES(test_gru_no_vconcat_weights_1x34x128);
RUN_TEST_ALL_DLFLOAT16_PRE_DATATYPES(test_gru_no_vconcat_weights_2x50x150);
RUN_TEST_ALL_DLFLOAT16_PRE_DATATYPES(test_gru_prev_bidir_weights_1x6x64);
RUN_TEST_ALL_DLFLOAT16_PRE_DATATYPES(test_gru_prev_bidir_weights_2x6x64);
RUN_TEST_ALL_DLFLOAT16_PRE_DATATYPES(test_gru_prev_bidir_weights_1x10x70);
RUN_TEST_ALL_DLFLOAT16_PRE_DATATYPES(test_gru_prev_bidir_weights_2x10x70);
RUN_TEST_ALL_DLFLOAT16_PRE_DATATYPES(test_gru_prev_bidir_weights_1x34x128);
RUN_TEST_ALL_DLFLOAT16_PRE_DATATYPES(test_gru_prev_bidir_weights_2x34x128);
RUN_TEST_ALL_DLFLOAT16_PRE_DATATYPES(test_gru_prev_bidir_weights_1x50x150);
RUN_TEST_ALL_DLFLOAT16_PRE_DATATYPES(test_gru_prev_bidir_weights_2x50x150);
RUN_TEST_ALL_DLFLOAT16_PRE_DATATYPES(test_CONCAT_GRU_max_dim1);
RUN_TEST_ALL_DLFLOAT16_PRE_DATATYPES(test_CONCAT_GRU_fail_unsupported_layout);
RUN_TEST_ALL_DLFLOAT16_PRE_DATATYPES(test_CONCAT_GRU_fail_dim1_too_big);
RUN_TEST_ALL_DLFLOAT16_PRE_DATATYPES(test_uni_output_1x1x2x8);
RUN_TEST_ALL_DLFLOAT16_PRE_DATATYPES(test_uni_output_2x1x5x32);
RUN_TEST_ALL_DLFLOAT16_PRE_DATATYPES(test_bidir_output_1x2x2x8);
RUN_TEST_ALL_DLFLOAT16_PRE_DATATYPES(test_bidir_output_2x2x5x32);
RUN_TEST_ALL_DLFLOAT16_PRE_DATATYPES(test_uni_output_1x1x3x64);
RUN_TEST_ALL_DLFLOAT16_PRE_DATATYPES(test_uni_output_2x1x10x70);
RUN_TEST_ALL_DLFLOAT16_PRE_DATATYPES(test_uni_output_1x1x34x128);
RUN_TEST_ALL_DLFLOAT16_PRE_DATATYPES(test_uni_output_2x1x50x150);
RUN_TEST_ALL_DLFLOAT16_PRE_DATATYPES(test_bidir_output_1x2x3x64);
RUN_TEST_ALL_DLFLOAT16_PRE_DATATYPES(test_bidir_output_2x2x10x70);
RUN_TEST_ALL_DLFLOAT16_PRE_DATATYPES(test_bidir_output_1x2x34x128);
RUN_TEST_ALL_DLFLOAT16_PRE_DATATYPES(test_bidir_output_2x2x50x150);
RUN_TEST_ALL_DLFLOAT16_PRE_DATATYPES(test_bidir_output_max_dim1);
RUN_TEST_ALL_DLFLOAT16_PRE_DATATYPES(test_bidir_output_fail_dim1_too_big);
RUN_TEST(test_zdnn_init_ztensor_function);
RUN_TEST(test_zdnn_init_ztensor_via_malloc_function);
RUN_TEST(test_zdnn_is_quantized_ztensor_scale);
RUN_TEST(test_zdnn_is_quantized_ztensor_false);
return UNITY_END();
}
zDNN-1.1.2/tests/testDriver_logger.c 0000664 0000000 0000000 00000015325 15000221702 0017317 0 ustar 00root root 0000000 0000000 // SPDX-License-Identifier: Apache-2.0
/*
* Copyright IBM Corp. 2021, 2024
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#include
#include
#include
#include
#include
#include "testsupport.h"
char msg_trace[] = "TRACE";
char msg_debug[] = "DEBUG";
char msg_info[] = "INFO";
char msg_warn[] = "WARN";
char msg_error[] = "ERROR";
char msg_fatal[] = "FATAL";
void setUp(void) {
#ifndef ZDNN_CONFIG_DEBUG
TEST_IGNORE_MESSAGE(
"ZDNN_CONFIG_DEBUG not set. Unable to test full logger. Skip tests.");
#endif
}
void tearDown(void) {}
void try_log(uint32_t loglvl) {
// override whatever ZDNN_LOGLEVEL/ZDNN_LOGMODULE are set in env
log_level = loglvl;
log_module[0] = '\0';
char buf_stdout[BUFSIZ] = {0};
char buf_stderr[BUFSIZ] = {0};
stdout_to_pipe();
stderr_to_pipe();
LOG_TRACE(msg_trace, NO_ARG);
LOG_DEBUG(msg_debug, NO_ARG);
LOG_INFO(msg_info, NO_ARG);
LOG_WARN(msg_warn, NO_ARG);
LOG_ERROR(msg_error, NO_ARG);
LOG_FATAL(msg_fatal, NO_ARG);
restore_stdout(buf_stdout, BUFSIZ);
restore_stderr(buf_stderr, BUFSIZ);
#define EXPECTS_ONLY_STDOUT(msg) \
if (strstr(buf_stdout, msg) == NULL) { \
TEST_FAIL_MESSAGE("can't find " #msg " message in STDOUT"); \
} \
if (strstr(buf_stderr, msg) != NULL) { \
TEST_FAIL_MESSAGE("found " #msg " message unexpectedly STDERR"); \
}
#define EXPECTS_ONLY_STDERR(msg) \
if (strstr(buf_stderr, msg) == NULL) { \
TEST_FAIL_MESSAGE("can't find " #msg " message in STDERR"); \
} \
if (strstr(buf_stdout, msg) != NULL) { \
TEST_FAIL_MESSAGE("found " #msg " message unexpectedly STDOUT"); \
}
#define EXPECTS_NEITHER(msg) \
if (strstr(buf_stdout, msg) != NULL) { \
TEST_FAIL_MESSAGE("found " #msg " message unexpectedly STDOUT"); \
} \
if (strstr(buf_stderr, msg) != NULL) { \
TEST_FAIL_MESSAGE("found " #msg " message unexpectedly STDERR"); \
}
switch (loglvl) {
case (LOGLEVEL_TRACE):
EXPECTS_ONLY_STDOUT(msg_trace);
EXPECTS_ONLY_STDOUT(msg_debug);
EXPECTS_ONLY_STDOUT(msg_info);
EXPECTS_ONLY_STDOUT(msg_warn);
EXPECTS_ONLY_STDERR(msg_error);
EXPECTS_ONLY_STDERR(msg_fatal);
break;
case (LOGLEVEL_DEBUG):
EXPECTS_NEITHER(msg_trace);
EXPECTS_ONLY_STDOUT(msg_debug);
EXPECTS_ONLY_STDOUT(msg_info);
EXPECTS_ONLY_STDOUT(msg_warn);
EXPECTS_ONLY_STDERR(msg_error);
EXPECTS_ONLY_STDERR(msg_fatal);
break;
case (LOGLEVEL_INFO):
EXPECTS_NEITHER(msg_trace);
EXPECTS_NEITHER(msg_debug);
EXPECTS_ONLY_STDOUT(msg_info);
EXPECTS_ONLY_STDOUT(msg_warn);
EXPECTS_ONLY_STDERR(msg_error);
EXPECTS_ONLY_STDERR(msg_fatal);
break;
case (LOGLEVEL_WARN):
EXPECTS_NEITHER(msg_trace);
EXPECTS_NEITHER(msg_debug);
EXPECTS_NEITHER(msg_info);
EXPECTS_ONLY_STDOUT(msg_warn);
EXPECTS_ONLY_STDERR(msg_error);
EXPECTS_ONLY_STDERR(msg_fatal);
break;
case (LOGLEVEL_ERROR):
EXPECTS_NEITHER(msg_trace);
EXPECTS_NEITHER(msg_debug);
EXPECTS_NEITHER(msg_info);
EXPECTS_NEITHER(msg_warn);
EXPECTS_ONLY_STDERR(msg_error);
EXPECTS_ONLY_STDERR(msg_fatal);
break;
case (LOGLEVEL_FATAL):
EXPECTS_NEITHER(msg_trace);
EXPECTS_NEITHER(msg_debug);
EXPECTS_NEITHER(msg_info);
EXPECTS_NEITHER(msg_warn);
EXPECTS_NEITHER(msg_error);
EXPECTS_ONLY_STDERR(msg_fatal);
break;
case (LOGLEVEL_OFF):
EXPECTS_NEITHER(msg_trace);
EXPECTS_NEITHER(msg_debug);
EXPECTS_NEITHER(msg_info);
EXPECTS_NEITHER(msg_warn);
EXPECTS_NEITHER(msg_error);
EXPECTS_NEITHER(msg_fatal);
break;
default:
TEST_FAIL_MESSAGE_FORMATTED("Invalid log level %u", loglvl);
}
}
void test_off(void) { try_log(LOGLEVEL_OFF); }
void test_fatal(void) { try_log(LOGLEVEL_FATAL); }
void test_err0r(void) { try_log(LOGLEVEL_ERROR); } // "error" confuses jenkins
void test_warn(void) { try_log(LOGLEVEL_WARN); }
void test_info(void) { try_log(LOGLEVEL_INFO); }
void test_debug(void) { try_log(LOGLEVEL_DEBUG); }
void test_trace(void) { try_log(LOGLEVEL_TRACE); }
// log_module with only "testDriver_logger.c" in it
void test_in_logmodule() {
log_level = LOGLEVEL_INFO;
strncpy(log_module, __FILE__, LOGMODULE_SIZE);
char buf_stdout[BUFSIZ] = {0};
stdout_to_pipe();
LOG_INFO(msg_info, NO_ARG);
restore_stdout(buf_stdout, BUFSIZ);
if (strstr(buf_stdout, msg_info) == NULL) {
TEST_FAIL_MESSAGE("can't find message message in STDOUT");
}
fflush(stdout);
}
// log_module with "testDriver_logger.c" somewhere in the string
void test_in_logmodule2() {
log_level = LOGLEVEL_INFO;
strncpy(log_module, "fafafa.c " __FILE__ " lalala.c", LOGMODULE_SIZE);
char buf_stdout[BUFSIZ] = {0};
stdout_to_pipe();
LOG_INFO(msg_info, NO_ARG);
restore_stdout(buf_stdout, BUFSIZ);
if (strstr(buf_stdout, msg_info) == NULL) {
TEST_FAIL_MESSAGE("can't find message message in STDOUT");
}
fflush(stdout);
}
// log_module with "testDriver_logger.c" completely not in
void test_not_in_logmodule() {
log_level = LOGLEVEL_INFO;
strncpy(log_module, "hahahahaha.c", LOGMODULE_SIZE);
char buf_stdout[BUFSIZ] = {0};
stdout_to_pipe();
LOG_INFO(msg_info, NO_ARG);
restore_stdout(buf_stdout, BUFSIZ);
if (strstr(buf_stdout, msg_info) != NULL) {
TEST_FAIL_MESSAGE("found message unexpectedly STDOUT");
}
fflush(stdout);
}
int main(void) {
UNITY_BEGIN();
RUN_TEST(test_trace);
RUN_TEST(test_debug);
RUN_TEST(test_info);
RUN_TEST(test_warn);
RUN_TEST(test_err0r);
RUN_TEST(test_fatal);
RUN_TEST(test_off);
RUN_TEST(test_in_logmodule);
RUN_TEST(test_in_logmodule2);
RUN_TEST(test_not_in_logmodule);
return UNITY_END();
}
zDNN-1.1.2/tests/testDriver_malloc4k.c 0000664 0000000 0000000 00000005223 15000221702 0017542 0 ustar 00root root 0000000 0000000 // SPDX-License-Identifier: Apache-2.0
/*
* Copyright IBM Corp. 2021, 2024
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#include
#include
#include
#include
#include
#include "testsupport.h"
void setUp(void) {}
void tearDown(void) {}
// test 0-byte allocation
void malloc4k_zero() {
void *ptr = malloc_aligned_4k(0);
TEST_ASSERT_MESSAGE(
ptr == NULL,
"malloc_aligned_4k() returned non-zero for 0-byte allocation");
}
// test absolute hardware max + 1 byte allocation
// SIZE_MAX is 18446744073709551615UL (2^64)
void malloc4k_size_max_plus_one() {
void *ptr = malloc_aligned_4k(SIZE_MAX + 1);
TEST_ASSERT_MESSAGE(
ptr == NULL,
"malloc_aligned_4k() returned non-zero SIZE_MAX+1 bytes allocation");
}
// test different happy-path allocation sizes and make sure the return address
// is on 4k boundary
void malloc4k_check_boundary() {
#define PLUS_AND_MINUS 2
// 1K, 4K, 32K, 64K, 256K, 1M, 1G, 2G
// 5 allocations (-2, -1, +0, +1, +2) of each
unsigned int allocations[] = {1, 4, 32, 64,
256, 1024, 1024 * 1024, 2 * 1024 * 1024};
for (int i = 0; i < sizeof(allocations) / sizeof(allocations[0]); i++) {
for (size_t j = allocations[i] * 1024 - PLUS_AND_MINUS;
j <= allocations[i] * 1024 + PLUS_AND_MINUS; j++) {
void *ptr = malloc_aligned_4k(j);
LOG_DEBUG(
"malloc_aligned_4k() returned location = %016lx\n, size = %zu\n",
(uintptr_t)ptr, j);
TEST_ASSERT_MESSAGE_FORMATTED(
ptr,
"detected NULL return from malloc_aligned_4k(), size = %zu, "
"location = %016lx\n",
j, (uintptr_t)ptr);
TEST_ASSERT_MESSAGE_FORMATTED(
!((uintptr_t)ptr % AIU_PAGESIZE_IN_BYTES),
"detected non-4k aligned return from malloc_aligned_4k(), size = "
"%zu, "
"location = %016lx\n",
j, (uintptr_t)ptr);
free_aligned_4k(ptr);
}
}
TEST_PASS();
}
int main(void) {
UNITY_BEGIN();
RUN_TEST(malloc4k_zero);
RUN_TEST(malloc4k_size_max_plus_one);
RUN_TEST(malloc4k_check_boundary);
return UNITY_END();
}
zDNN-1.1.2/tests/testDriver_nnpa_parm_block.c 0000664 0000000 0000000 00000053132 15000221702 0021163 0 ustar 00root root 0000000 0000000 // SPDX-License-Identifier: Apache-2.0
/*
* Copyright IBM Corp. 2021, 2024
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#include "testsupport.h"
#include
#include
#include
#include
#include
// Query block offsets
#define INSTALLED_FUNCTIONS_VECTOR_OFFSET 0
#define INSTALLED_PARAMETER_BLOCK_FORMATS_OFFSET 32
#define INSTALLED_DATA_TYPES_OFFSET 48
#define INSTALLED_DATA_LAYOUT_FORMATS_OFFSET 52
#define MAXIMUM_DIMENSION_INDEX_SIZE_OFFSET 60
#define MAXIMUM_TENSOR_SIZE_OFFSET 64
#define INSTALLED_DT1_CONVERSIONS_VECTOR_OFFSET 72
#define MAX_DIM4_INDEX_SIZE_OFFSET 88
#define MAX_DIM3_INDEX_SIZE_OFFSET 92
#define MAX_DIM2_INDEX_SIZE_OFFSET 96
#define MAX_DIM1_INDEX_SIZE_OFFSET 100
// Standard NNPA block offsets
#define PARM_BLOCK_VERSION_NUMBER_OFFSET 0
#define MODEL_VERSION_NUMBER_OFFSET 2
#define NNPA_RESERVED_FOR_IBM1 3
#define NNPA_RESERVED_FOR_IBM2 4
#define NNPA_RESERVED_FOR_IBM3 12
#define NNPA_RESERVED_FOR_IBM4 20
#define NNPA_RESERVED_FOR_IBM5 28
#define FUNC_SPECIFIC_SAVE_AREA_ADDR_OFFSET 56
#define OUTPUT_TENSOR_DESC_1_OFFSET 64
#define OUTPUT_TENSOR_DESC_2_OFFSET 96
#define INPUT_TENSOR_DESC_1_OFFSET 192
#define INPUT_TENSOR_DESC_2_OFFSET 224
#define INPUT_TENSOR_DESC_3_OFFSET 256
#define FUNCTION_SPECIFIC_PARMS 384
#define FUNCTION_SPECIFIC_PARM_1 384
#define FUNCTION_SPECIFIC_PARM_2 388
#define FUNCTION_SPECIFIC_PARM_3 392
#define FUNCTION_SPECIFIC_PARM_4 396
#define FUNCTION_SPECIFIC_PARM_5 400
#define FUNCTION_SPECIFIC_PARM_6 404
#define FUNCTION_SPECIFIC_PARM_7 408
#define FUNCTION_SPECIFIC_PARM_8 412
#define FUNCTION_SPECIFIC_PARM_9 416
#define FUNCTION_SPECIFIC_PARM_10 420
#define FUNCTION_SPECIFIC_PARM_11 424
#define FUNCTION_SPECIFIC_PARM_12 428
#define FUNCTION_SPECIFIC_PARM_13 432
#define FUNCTION_SPECIFIC_PARM_14 436
#define FUNCTION_SPECIFIC_PARM_15 440
#define FUNCTION_SPECIFIC_PARM_16 444
#define CSB_OFFSET 512
void setUp(void) {}
void tearDown(void) {}
/*
* Verify that the tensor descriptor was updated with the correct
* information from the ztensor. invalid_type set when testing for invalid
* data_type.
*/
void verify_populate_descriptor(nnpa_tensor_descriptor *descriptor,
zdnn_ztensor *ztensor) {
LOG_DEBUG("Verifying descriptor", NO_ARG);
TEST_ASSERT_EQUAL_UINT8_MESSAGE(ztensor->transformed_desc->format,
descriptor->data_layout_format,
"Incorrect data layout format.");
TEST_ASSERT_EQUAL_UINT32_MESSAGE(ztensor->transformed_desc->dim4,
descriptor->dim4_index_size,
"Incorrect dim4 index size");
TEST_ASSERT_EQUAL_UINT32_MESSAGE(ztensor->transformed_desc->dim3,
descriptor->dim3_index_size,
"Incorrect dim3 index size");
TEST_ASSERT_EQUAL_UINT32_MESSAGE(ztensor->transformed_desc->dim2,
descriptor->dim2_index_size,
"Incorrect dim2 index size");
TEST_ASSERT_EQUAL_UINT32_MESSAGE(ztensor->transformed_desc->dim1,
descriptor->dim1_index_size,
"Incorrect dim1 index size");
TEST_ASSERT_EQUAL_UINT64_MESSAGE(ztensor->buffer,
descriptor->tensor_data_addr,
"Incorrect tensor pointer");
}
/*
* Common routine for driving all x-inputs y-outputs testcases
* variadic parameters are input dims followed by output dims, which the
* dims are in {outermost, ..., innermost} order
*/
void populate_x_inputs_y_outputs(uint8_t num_inputs, uint8_t num_outputs,
zdnn_data_types type, ...) {
// Allocate and initialize our nnpa_parm_blocks
nnpa_parameter_block parm_block;
nnpa_parameter_block parm_block_all;
zdnn_ztensor input_ztensor[num_inputs], output_ztensor[num_outputs];
int dummy; // something for ztensor.buffer to point to
va_list ap;
va_start(ap, type);
// variadic: input dim arrays then output dim arrays
for (int i = 0; i < num_inputs; i++) {
uint32_t *dims = va_arg(ap, uint32_t *);
input_ztensor[i].transformed_desc = malloc(sizeof(zdnn_tensor_desc));
// dims[0] is the outermost dimension
init_transformed_desc(ZDNN_NHWC, type, ZDNN_FORMAT_4DFEATURE,
input_ztensor[i].transformed_desc, dims[0], dims[1],
dims[2], dims[3]);
input_ztensor[i].buffer = &dummy;
}
for (int i = 0; i < num_outputs; i++) {
uint32_t *dims = va_arg(ap, uint32_t *);
output_ztensor[i].transformed_desc = malloc(sizeof(zdnn_tensor_desc));
init_transformed_desc(ZDNN_NHWC, type, ZDNN_FORMAT_4DFEATURE,
output_ztensor[i].transformed_desc, dims[0], dims[1],
dims[2], dims[3]);
output_ztensor[i].buffer = &dummy;
}
va_end(ap);
function_specific_parameters fsp = {0};
populate_nnpa_parm_block(
&parm_block_all, NNPA_PARMBLKFORMAT_0, &input_ztensor[0],
(num_inputs > 1) ? &input_ztensor[1] : NULL,
(num_inputs > 2) ? &input_ztensor[2] : NULL, &output_ztensor[0],
(num_outputs > 1) ? &output_ztensor[1] : NULL, 0, &fsp);
// treat parm_block->input_tensor1/2/3 as if an array so we can loop them
nnpa_tensor_descriptor *block_input_ptr = &(parm_block.input_tensor1);
nnpa_tensor_descriptor *block_all_input_ptr = &(parm_block_all.input_tensor1);
for (int i = 0; i < num_inputs; i++) {
populate_descriptor(block_input_ptr + i, &input_ztensor[i]);
verify_populate_descriptor(block_all_input_ptr + i, &input_ztensor[i]);
verify_populate_descriptor(block_input_ptr + i, &input_ztensor[i]);
}
nnpa_tensor_descriptor *block_output_ptr = &(parm_block.output_tensor1);
nnpa_tensor_descriptor *block_all_output_ptr =
&(parm_block_all.output_tensor1);
for (int i = 0; i < num_outputs; i++) {
populate_descriptor(block_output_ptr + i, &output_ztensor[i]);
verify_populate_descriptor(block_all_output_ptr + i, &output_ztensor[i]);
verify_populate_descriptor(block_output_ptr + i, &output_ztensor[i]);
}
for (int i = 0; i < num_inputs; i++) {
free(input_ztensor[i].transformed_desc);
}
for (int i = 0; i < num_outputs; i++) {
free(output_ztensor[i].transformed_desc);
}
}
/*
* Test to ensure using either populate_descriptor or populate_all_descriptor
* updates the nnpa parm block appropriately for 1 input tensor
*/
void populate_single_input() {
uint32_t shape[ZDNN_MAX_DIMS] = {1, 1, 1, 3};
populate_x_inputs_y_outputs(1, 1, ZDNN_DLFLOAT16, shape, shape);
}
/*
* Test to ensure using either populate_descriptor or populate_all_descriptor
* updates the nnpa parm block appropriately for 1 input tensor and 2 output
* tensors
*/
void populate_single_input_double_output() {
uint32_t shape[ZDNN_MAX_DIMS] = {1, 1, 1, 3};
populate_x_inputs_y_outputs(1, 2, ZDNN_DLFLOAT16, shape, shape, shape);
}
/*
* Test to ensure using either populate_descriptor or populate_all_descriptor
* updates the nnpa parm block appropriately for 2 input tensors
*/
void populate_double_input() {
unsigned int input_dims[ZDNN_MAX_DIMS] = {4, 2, 1, 3};
unsigned int output_dims[ZDNN_MAX_DIMS] = {2, 1, 5, 2};
populate_x_inputs_y_outputs(2, 1, ZDNN_DLFLOAT16, input_dims, input_dims,
output_dims);
}
/*
* Test to ensure using either populate_descriptor or populate_all_descriptor
* updates the nnpa parm block appropriately for 3 input tensors
*/
void populate_triple_input() {
unsigned int input_dims[ZDNN_MAX_DIMS] = {5, 3, 1, 1};
unsigned int output_dims[ZDNN_MAX_DIMS] = {8, 1, 2, 4};
populate_x_inputs_y_outputs(3, 1, ZDNN_DLFLOAT16, input_dims, input_dims,
input_dims, output_dims);
}
/**
* Function to verify the offsets of each element in a nnpa_parameter_block
* struct.
*
* Parameter block offsets:
*
Bytes: Name:
0-1 PBVN
2 MVN
3 RIBM1
4-5 RIBM2
6-7 Reserved (1 bit latency flag and 1-bit Continuation Flag at end)
8-11 Reserved
12-15 RIBM3
16-19 Reserved
20-23 RIBM4
24-27 Reserved
28-31 RIBM5
32-55 Reserved
56-63 Function-specific-save-area-address
64-95 Output Tensor Descriptor 1
96-127 Output Tensor Descriptor 2
128-191 Reserved
192-223 Input Tensor Descriptor 1
224-255 Input Tensor Descriptor 2
256-287 Input Tensor Descriptor 3
288-383 Reserved
384-387 Function-specific-parameter-1
388-391 Function-specific-parameter-2
392-295 Function-specific-parameter-3
396-399 Function-specific-parameter-4
400-403 Function-specific-parameter-5
404-407 Function-specific-parameter-6
408-411 Function-specific-parameter-7
412-415 Function-specific-parameter-8
416-419 Function-specific-parameter-9
420-423 Function-specific-parameter-10
424-427 Function-specific-parameter-11
428-431 Function-specific-parameter-12
432-435 Function-specific-parameter-13
436-439 Function-specific-parameter-14
440-443 Function-specific-parameter-15
444-447 Function-specific-parameter-16
448-511 Reserved
512-4088 CSB
*/
void verify_parm_block_offsets() {
TEST_ASSERT_EQUAL_MESSAGE(
PARM_BLOCK_VERSION_NUMBER_OFFSET,
offsetof(nnpa_parameter_block, parm_block_version_number),
"parm_block_version in nnpa_parameter_block has incorrect offset");
TEST_ASSERT_EQUAL_MESSAGE(
MODEL_VERSION_NUMBER_OFFSET,
offsetof(nnpa_parameter_block, model_version_number),
"model_version in nnpa_parameter_block has incorrect offset");
TEST_ASSERT_EQUAL_MESSAGE(
NNPA_RESERVED_FOR_IBM1, offsetof(nnpa_parameter_block, reserved_for_ibm1),
"reserved for IBM in nnpa_parameter_block has incorrect offset");
/* Skipping as this member has been moved into a bitfield which an offset
* can't be determined.
TEST_ASSERT_EQUAL_MESSAGE(
NNPA_RESERVED_FOR_IBM2, offsetof(nnpa_parameter_block, reserved_for_ibm2),
"reserved for IBM 2 in nnpa_parameter_block has incorrect offset");
*/
TEST_ASSERT_EQUAL_MESSAGE(
NNPA_RESERVED_FOR_IBM3, offsetof(nnpa_parameter_block, reserved_for_ibm3),
"reserved for IBM 3 in nnpa_parameter_block has incorrect offset");
TEST_ASSERT_EQUAL_MESSAGE(
NNPA_RESERVED_FOR_IBM4, offsetof(nnpa_parameter_block, reserved_for_ibm4),
"reserved for IBM 4 in nnpa_parameter_block has incorrect offset");
TEST_ASSERT_EQUAL_MESSAGE(
NNPA_RESERVED_FOR_IBM5, offsetof(nnpa_parameter_block, reserved_for_ibm5),
"reserved for IBM 5 in nnpa_parameter_block has incorrect offset");
TEST_ASSERT_EQUAL_MESSAGE(
FUNC_SPECIFIC_SAVE_AREA_ADDR_OFFSET,
offsetof(nnpa_parameter_block, function_specific_save_area_address),
"function_specific_save_area_address in nnpa_parameter_block has "
"incorrect offset");
TEST_ASSERT_EQUAL_MESSAGE(OUTPUT_TENSOR_DESC_1_OFFSET,
offsetof(nnpa_parameter_block, output_tensor1),
"output_tensor1 in nnpa_parameter_block has "
"incorrect offset");
TEST_ASSERT_EQUAL_MESSAGE(OUTPUT_TENSOR_DESC_2_OFFSET,
offsetof(nnpa_parameter_block, output_tensor2),
"output_tensor2 in nnpa_parameter_block has "
"incorrect offset");
TEST_ASSERT_EQUAL_MESSAGE(INPUT_TENSOR_DESC_1_OFFSET,
offsetof(nnpa_parameter_block, input_tensor1),
"input_tensor1 in nnpa_parameter_block has "
"incorrect offset");
TEST_ASSERT_EQUAL_MESSAGE(INPUT_TENSOR_DESC_2_OFFSET,
offsetof(nnpa_parameter_block, input_tensor2),
"input_tensor2 in nnpa_parameter_block has "
"incorrect offset");
TEST_ASSERT_EQUAL_MESSAGE(INPUT_TENSOR_DESC_3_OFFSET,
offsetof(nnpa_parameter_block, input_tensor3),
"input_tensor3 in nnpa_parameter_block has "
"incorrect offset");
TEST_ASSERT_EQUAL_MESSAGE(
FUNCTION_SPECIFIC_PARMS,
offsetof(nnpa_parameter_block, function_specific_parms),
"function_specific_parms in nnpa_parameter_block has "
"incorrect offset");
TEST_ASSERT_EQUAL_MESSAGE(
FUNCTION_SPECIFIC_PARM_1,
(offsetof(nnpa_parameter_block, function_specific_parms) +
offsetof(function_specific_parameters, function_specific_parm1)),
"function_specific_parm1 in nnpa_parameter_block has "
"incorrect offset");
TEST_ASSERT_EQUAL_MESSAGE(
FUNCTION_SPECIFIC_PARM_2,
(offsetof(nnpa_parameter_block, function_specific_parms) +
offsetof(function_specific_parameters, function_specific_parm2)),
"function_specific_parm2 in nnpa_parameter_block has "
"incorrect offset");
TEST_ASSERT_EQUAL_MESSAGE(
FUNCTION_SPECIFIC_PARM_3,
(offsetof(nnpa_parameter_block, function_specific_parms) +
offsetof(function_specific_parameters, function_specific_parm3)),
"function_specific_parm3 in nnpa_parameter_block has "
"incorrect offset");
TEST_ASSERT_EQUAL_MESSAGE(
FUNCTION_SPECIFIC_PARM_4,
(offsetof(nnpa_parameter_block, function_specific_parms) +
offsetof(function_specific_parameters, function_specific_parm4)),
"function_specific_parm4 in nnpa_parameter_block has "
"incorrect offset");
TEST_ASSERT_EQUAL_MESSAGE(
FUNCTION_SPECIFIC_PARM_5,
(offsetof(nnpa_parameter_block, function_specific_parms) +
offsetof(function_specific_parameters, function_specific_parm5)),
"function_specific_parm5 in nnpa_parameter_block has "
"incorrect offset");
TEST_ASSERT_EQUAL_MESSAGE(
FUNCTION_SPECIFIC_PARM_6,
(offsetof(nnpa_parameter_block, function_specific_parms) +
offsetof(function_specific_parameters, function_specific_parm6)),
"function_specific_parm6 in nnpa_parameter_block has "
"incorrect offset");
TEST_ASSERT_EQUAL_MESSAGE(
FUNCTION_SPECIFIC_PARM_7,
(offsetof(nnpa_parameter_block, function_specific_parms) +
offsetof(function_specific_parameters, function_specific_parm7)),
"function_specific_parm7 in nnpa_parameter_block has "
"incorrect offset");
TEST_ASSERT_EQUAL_MESSAGE(
FUNCTION_SPECIFIC_PARM_8,
(offsetof(nnpa_parameter_block, function_specific_parms) +
offsetof(function_specific_parameters, function_specific_parm8)),
"function_specific_parm8 in nnpa_parameter_block has "
"incorrect offset");
TEST_ASSERT_EQUAL_MESSAGE(
FUNCTION_SPECIFIC_PARM_9,
(offsetof(nnpa_parameter_block, function_specific_parms) +
offsetof(function_specific_parameters, function_specific_parm9)),
"function_specific_parm9 in nnpa_parameter_block has "
"incorrect offset");
TEST_ASSERT_EQUAL_MESSAGE(
FUNCTION_SPECIFIC_PARM_10,
(offsetof(nnpa_parameter_block, function_specific_parms) +
offsetof(function_specific_parameters, function_specific_parm10)),
"function_specific_parm10 in nnpa_parameter_block has "
"incorrect offset");
TEST_ASSERT_EQUAL_MESSAGE(
FUNCTION_SPECIFIC_PARM_11,
(offsetof(nnpa_parameter_block, function_specific_parms) +
offsetof(function_specific_parameters, function_specific_parm11)),
"function_specific_parm11 in nnpa_parameter_block has "
"incorrect offset");
TEST_ASSERT_EQUAL_MESSAGE(
FUNCTION_SPECIFIC_PARM_12,
(offsetof(nnpa_parameter_block, function_specific_parms) +
offsetof(function_specific_parameters, function_specific_parm12)),
"function_specific_parm12 in nnpa_parameter_block has "
"incorrect offset");
TEST_ASSERT_EQUAL_MESSAGE(
FUNCTION_SPECIFIC_PARM_13,
(offsetof(nnpa_parameter_block, function_specific_parms) +
offsetof(function_specific_parameters, function_specific_parm13)),
"function_specific_parm13 in nnpa_parameter_block has "
"incorrect offset");
TEST_ASSERT_EQUAL_MESSAGE(
FUNCTION_SPECIFIC_PARM_14,
(offsetof(nnpa_parameter_block, function_specific_parms) +
offsetof(function_specific_parameters, function_specific_parm14)),
"function_specific_parm14 in nnpa_parameter_block has "
"incorrect offset");
TEST_ASSERT_EQUAL_MESSAGE(
FUNCTION_SPECIFIC_PARM_15,
(offsetof(nnpa_parameter_block, function_specific_parms) +
offsetof(function_specific_parameters, function_specific_parm15)),
"function_specific_parm15 in nnpa_parameter_block has "
"incorrect offset");
TEST_ASSERT_EQUAL_MESSAGE(
FUNCTION_SPECIFIC_PARM_16,
(offsetof(nnpa_parameter_block, function_specific_parms) +
offsetof(function_specific_parameters, function_specific_parm16)),
"function_specific_parm16 in nnpa_parameter_block has "
"incorrect offset");
TEST_ASSERT_EQUAL_MESSAGE(
CSB_OFFSET, offsetof(nnpa_parameter_block, continuation_state_buffer),
"continuation_state_buffer in nnpa_parameter_block has "
"incorrect offset");
}
/**
* Function to verify the offsets of each element in a
* aiu_parameter_block_nnpa_qaf struct.
*
* Parameter block offsets:
*
Bytes: Name:
0-31 installed_functions_vector;
32-47 installed_parameter_block_formats;
48-49 installed_data_types;
50-51 reserved1[2]
52-55 installed_data_layout_formats;
56-59 reserved2[4];
60-63 maximum_dimension_index_size;
64-71 maximum_tensor_size;
72-73 installed_dt1_conversions_vector
74-87 reserved3[14];
88-91 max_dim4_index_size
92-95 max_dim3_index_size
96-99 max_dim2_index_size
100-103 max_dim1_index_size
104-256 reserved4[152]
*/
void verify_qaf_parm_block_offsets() {
TEST_ASSERT_EQUAL_MESSAGE(
INSTALLED_FUNCTIONS_VECTOR_OFFSET,
offsetof(nnpa_qaf_parameter_block, installed_functions_vector),
"installed_functions_vector in aiu_parameter_block_nnpa_qaf has "
"incorrect offset");
TEST_ASSERT_EQUAL_MESSAGE(
INSTALLED_PARAMETER_BLOCK_FORMATS_OFFSET,
offsetof(nnpa_qaf_parameter_block, installed_parameter_block_formats),
"reserved1 in aiu_parameter_block_nnpa_qaf has incorrect offset");
TEST_ASSERT_EQUAL_MESSAGE(
INSTALLED_DATA_TYPES_OFFSET,
offsetof(nnpa_qaf_parameter_block, installed_data_types),
"installed_data_type in aiu_parameter_block_nnpa_qaf has "
"incorrect offset");
TEST_ASSERT_EQUAL_MESSAGE(
INSTALLED_DATA_LAYOUT_FORMATS_OFFSET,
offsetof(nnpa_qaf_parameter_block, installed_data_layout_formats),
"installed_data_layout_formats in aiu_parameter_block_nnpa_qaf has "
"incorrect offset");
TEST_ASSERT_EQUAL_MESSAGE(
MAXIMUM_DIMENSION_INDEX_SIZE_OFFSET,
offsetof(nnpa_qaf_parameter_block, maximum_dimension_index_size),
"maximum_dimension_index_size in aiu_parameter_block_nnpa_qaf has "
"incorrect offset");
TEST_ASSERT_EQUAL_MESSAGE(
MAXIMUM_TENSOR_SIZE_OFFSET,
offsetof(nnpa_qaf_parameter_block, maximum_tensor_size),
"maximum_tensor_size in aiu_parameter_block_nnpa_qaf has "
"incorrect offset");
TEST_ASSERT_EQUAL_MESSAGE(
INSTALLED_DT1_CONVERSIONS_VECTOR_OFFSET,
offsetof(nnpa_qaf_parameter_block, installed_dt1_conversions_vector),
"installed_dt1_conversions_vector in aiu_parameter_block_nnpa_qaf has "
"incorrect offset");
TEST_ASSERT_EQUAL_MESSAGE(
MAX_DIM4_INDEX_SIZE_OFFSET,
offsetof(nnpa_qaf_parameter_block, max_dim4_index_size),
"max_dim4_index_size in aiu_parameter_block_nnpa_qaf has "
"incorrect offset");
TEST_ASSERT_EQUAL_MESSAGE(
MAX_DIM3_INDEX_SIZE_OFFSET,
offsetof(nnpa_qaf_parameter_block, max_dim3_index_size),
"max_dim3_index_size in aiu_parameter_block_nnpa_qaf has "
"incorrect offset");
TEST_ASSERT_EQUAL_MESSAGE(
MAX_DIM2_INDEX_SIZE_OFFSET,
offsetof(nnpa_qaf_parameter_block, max_dim2_index_size),
"max_dim2_index_size in aiu_parameter_block_nnpa_qaf has "
"incorrect offset");
TEST_ASSERT_EQUAL_MESSAGE(
MAX_DIM1_INDEX_SIZE_OFFSET,
offsetof(nnpa_qaf_parameter_block, max_dim1_index_size),
"max_dim1_index_size in aiu_parameter_block_nnpa_qaf has "
"incorrect offset");
}
void verify_parm_block_size() {
TEST_ASSERT_EQUAL_MESSAGE(NNPA_PARMBLOCK_SIZE, sizeof(nnpa_parameter_block),
"size of nnpa_paramater_block is incorrect");
}
void verify_qaf_parm_block_size() {
TEST_ASSERT_EQUAL_MESSAGE(QAF_PARMBLOCK_SIZE,
sizeof(nnpa_qaf_parameter_block),
"size of nnpa_qaf_paramater_block is incorrect");
}
int main() {
UNITY_BEGIN();
RUN_TEST(populate_single_input);
RUN_TEST(populate_single_input_double_output);
RUN_TEST(populate_double_input);
RUN_TEST(populate_triple_input);
RUN_TEST(verify_parm_block_offsets);
RUN_TEST(verify_qaf_parm_block_offsets);
RUN_TEST(verify_parm_block_size);
RUN_TEST(verify_qaf_parm_block_size);
return UNITY_END();
}
zDNN-1.1.2/tests/testDriver_precheck.c 0000664 0000000 0000000 00000024306 15000221702 0017623 0 ustar 00root root 0000000 0000000 // SPDX-License-Identifier: Apache-2.0
/*
* Copyright IBM Corp. 2021, 2024
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#include "testsupport.h"
#include
#include
#include
zdnn_ztensor ztensor_input1, ztensor_input2, ztensor_input3, ztensor_output1,
ztensor_output2;
zdnn_tensor_desc pre_tfrmd_desc, input1_tfrmd_desc, input2_tfrmd_desc,
input3_tfrmd_desc, output1_tfrmd_desc, output2_tfrmd_desc;
void create_garbage_tensors();
void setUp(void) { create_garbage_tensors(); }
void tearDown(void) {}
/*********************************************************************
* The goal is to verify if the verifier routine is invoked when
* precheck_enabled = true, not if the verifier routine returns the
* correct status code (which is testDriver_tensor_verify*.c's job).
*
* On environment equipped with zAIU, all testcases will cause program
* termination due to DXG rather than issuing a non-ZDNN_OK status.
* *******************************************************************/
/// Create garbage input/output tensors that are guaranteed to fail any zAIU op
void create_garbage_tensors() {
precheck_enabled = true;
uint32_t dim4 = 1, dim3 = 1, dim2 = 1, dim1 = 1;
zdnn_data_layouts layout = ZDNN_NHWC;
zdnn_data_types type = FP16;
zdnn_init_pre_transformed_desc(layout, type, &pre_tfrmd_desc, dim4, dim3,
dim2, dim1);
// all inputs and outputs same shape
zdnn_generate_transformed_desc(&pre_tfrmd_desc, &input1_tfrmd_desc);
zdnn_generate_transformed_desc(&pre_tfrmd_desc, &input2_tfrmd_desc);
zdnn_generate_transformed_desc(&pre_tfrmd_desc, &input3_tfrmd_desc);
zdnn_generate_transformed_desc(&pre_tfrmd_desc, &output1_tfrmd_desc);
zdnn_generate_transformed_desc(&pre_tfrmd_desc, &output2_tfrmd_desc);
zdnn_init_ztensor_with_malloc(&pre_tfrmd_desc, &input1_tfrmd_desc,
&ztensor_input1);
zdnn_init_ztensor_with_malloc(&pre_tfrmd_desc, &input2_tfrmd_desc,
&ztensor_input2);
zdnn_init_ztensor_with_malloc(&pre_tfrmd_desc, &input3_tfrmd_desc,
&ztensor_input3);
zdnn_init_ztensor_with_malloc(&pre_tfrmd_desc, &output1_tfrmd_desc,
&ztensor_output1);
zdnn_init_ztensor_with_malloc(&pre_tfrmd_desc, &output2_tfrmd_desc,
&ztensor_output2);
// all input tensors are features, all output tensors are kernels.
ztensor_output1.transformed_desc->format = ZDNN_FORMAT_4DKERNEL;
ztensor_output2.transformed_desc->format = ZDNN_FORMAT_4DKERNEL;
}
void bad_element_wise() {
zdnn_status status =
zdnn_add(&ztensor_input1, &ztensor_input2, &ztensor_output1);
TEST_ASSERT_MESSAGE_FORMATTED(status != ZDNN_OK,
"Expected failure status but got %d \"%s\"",
status, zdnn_get_status_message(status));
}
void bad_batchnorm() {
zdnn_status status = zdnn_batchnorm(&ztensor_input1, &ztensor_input2,
&ztensor_input3, &ztensor_output1);
TEST_ASSERT_MESSAGE_FORMATTED(status != ZDNN_OK,
"Expected failure status but got %d \"%s\"",
status, zdnn_get_status_message(status));
}
void bad_lstm() {
// ZDNN_INVALID_SHAPE because all dims are 1s
zdnn_status exp_status = ZDNN_INVALID_SHAPE;
zdnn_status status =
zdnn_lstm(&ztensor_input1, &ztensor_input2, &ztensor_input3,
&ztensor_input1, &ztensor_input2, &ztensor_input3,
&ztensor_input1, FWD, NULL, &ztensor_output1, &ztensor_output2);
TEST_ASSERT_MESSAGE_FORMATTED(
status == exp_status, "Got status %d \"%s\" but expected %d \"%s\"",
status, zdnn_get_status_message(status), exp_status,
(zdnn_get_status_message(exp_status)));
}
void bad_matmul_op_with_bias_addition() {
zdnn_status status =
zdnn_matmul_op(&ztensor_input1, &ztensor_input2, &ztensor_input3,
MATMUL_OP_ADDITION, &ztensor_output1);
TEST_ASSERT_MESSAGE_FORMATTED(status != ZDNN_OK,
"Expected failure status but got %d \"%s\"",
status, zdnn_get_status_message(status));
}
void bad_matmul_bcast_op_with_bias_addition() {
zdnn_status status =
zdnn_matmul_bcast_op(&ztensor_input1, &ztensor_input2, &ztensor_input3,
MATMUL_BCAST_OP_ADDITION, &ztensor_output1);
TEST_ASSERT_MESSAGE_FORMATTED(status != ZDNN_OK,
"Expected failure status but got %d \"%s\"",
status, zdnn_get_status_message(status));
}
void bad_pool() {
zdnn_status status =
zdnn_avgpool2d(&ztensor_input1, 1, 1, 1, 1, 1, &ztensor_output1);
TEST_ASSERT_MESSAGE_FORMATTED(status != ZDNN_OK,
"Expected failure status but got %d \"%s\"",
status, zdnn_get_status_message(status));
}
void negative_relu_clipping() {
VERIFY_HW_ENV; // zdnn_relu drives HW conversion before precheck
ztensor_output1.transformed_desc->format = ZDNN_FORMAT_4DFEATURE;
zdnn_status exp_status = ZDNN_INVALID_CLIPPING_VALUE;
float clip_value = -1;
zdnn_status status =
zdnn_relu(&ztensor_input1, (void *)&clip_value, &ztensor_output1);
TEST_ASSERT_MESSAGE_FORMATTED(
status == exp_status, "Got status %d \"%s\" but expected %d \"%s\"",
status, zdnn_get_status_message(status), exp_status,
(zdnn_get_status_message(exp_status)));
}
void nan_relu_clipping() {
VERIFY_HW_ENV; // zdnn_relu drives HW conversion before precheck
ztensor_output1.transformed_desc->format = ZDNN_FORMAT_4DFEATURE;
zdnn_status exp_status = ZDNN_INVALID_CLIPPING_VALUE;
uint32_t clip_value = 0x7FFFFFFF;
zdnn_status status =
zdnn_relu(&ztensor_input1, (void *)&clip_value, &ztensor_output1);
TEST_ASSERT_MESSAGE_FORMATTED(
status == exp_status, "Got status %d \"%s\" but expected %d \"%s\"",
status, zdnn_get_status_message(status), exp_status,
(zdnn_get_status_message(exp_status)));
}
void negative_nan_relu_clipping() {
VERIFY_HW_ENV; // zdnn_relu drives HW conversion before precheck
ztensor_output1.transformed_desc->format = ZDNN_FORMAT_4DFEATURE;
zdnn_status exp_status = ZDNN_INVALID_CLIPPING_VALUE;
uint32_t clip_value = 0xFFFFFFFF;
zdnn_status status =
zdnn_relu(&ztensor_input1, (void *)&clip_value, &ztensor_output1);
TEST_ASSERT_MESSAGE_FORMATTED(
status == exp_status, "Got status %d \"%s\" but expected %d \"%s\"",
status, zdnn_get_status_message(status), exp_status,
(zdnn_get_status_message(exp_status)));
}
// Make all tensor and other values correct.
void setup_conv2d_tensors() {
ztensor_output1.transformed_desc->format = ZDNN_FORMAT_4DFEATURE;
ztensor_input1.transformed_desc->dim4 = 1;
ztensor_input1.transformed_desc->dim3 = 4;
ztensor_input1.transformed_desc->dim2 = 3;
ztensor_input1.transformed_desc->dim1 = 5;
ztensor_input2.transformed_desc->dim4 = 2;
ztensor_input2.transformed_desc->dim3 = 2;
ztensor_input2.transformed_desc->dim2 = 5;
ztensor_input2.transformed_desc->dim1 = 2;
ztensor_input3.transformed_desc->dim4 = 1;
ztensor_input3.transformed_desc->dim3 = 1;
ztensor_input3.transformed_desc->dim2 = 1;
ztensor_input3.transformed_desc->dim1 = 2;
ztensor_output1.transformed_desc->dim4 = 1;
ztensor_output1.transformed_desc->dim3 = 3;
ztensor_output1.transformed_desc->dim2 = 2;
ztensor_output1.transformed_desc->dim1 = 2;
}
void negative_conv2d_clipping() {
VERIFY_HW_ENV; // zdnn_conv2d drives HW conversion before precheck
setup_conv2d_tensors();
zdnn_status exp_status = ZDNN_INVALID_CLIPPING_VALUE;
float clip_value = -1;
zdnn_status status = zdnn_conv2d(
&ztensor_input1, &ztensor_input2, &ztensor_input3, VALID_PADDING, 1, 1,
CONV2D_ACT_RELU, (void *)&clip_value, &ztensor_output1);
TEST_ASSERT_MESSAGE_FORMATTED(
status == exp_status, "Got status %d \"%s\" but expected %d \"%s\"",
status, zdnn_get_status_message(status), exp_status,
(zdnn_get_status_message(exp_status)));
}
void nan_conv2d_clipping() {
VERIFY_HW_ENV; // zdnn_conv2d drives HW conversion before precheck
setup_conv2d_tensors();
zdnn_status exp_status = ZDNN_INVALID_CLIPPING_VALUE;
uint32_t clip_value = 0x7FFFFFFF;
zdnn_status status = zdnn_conv2d(
&ztensor_input1, &ztensor_input2, &ztensor_input3, VALID_PADDING, 1, 1,
CONV2D_ACT_RELU, (void *)&clip_value, &ztensor_output1);
TEST_ASSERT_MESSAGE_FORMATTED(
status == exp_status, "Got status %d \"%s\" but expected %d \"%s\"",
status, zdnn_get_status_message(status), exp_status,
(zdnn_get_status_message(exp_status)));
}
void negative_nan_conv2d_clipping() {
VERIFY_HW_ENV; // zdnn_conv2d drives HW conversion before precheck
setup_conv2d_tensors();
zdnn_status exp_status = ZDNN_INVALID_CLIPPING_VALUE;
uint32_t clip_value = 0xFFFFFFFF;
zdnn_status status = zdnn_conv2d(
&ztensor_input1, &ztensor_input2, &ztensor_input3, VALID_PADDING, 1, 1,
CONV2D_ACT_RELU, (void *)&clip_value, &ztensor_output1);
TEST_ASSERT_MESSAGE_FORMATTED(
status == exp_status, "Got status %d \"%s\" but expected %d \"%s\"",
status, zdnn_get_status_message(status), exp_status,
(zdnn_get_status_message(exp_status)));
}
int main() {
UNITY_BEGIN();
RUN_TEST(bad_element_wise);
RUN_TEST(bad_batchnorm);
RUN_TEST(bad_lstm);
RUN_TEST(bad_matmul_op_with_bias_addition);
RUN_TEST(bad_matmul_bcast_op_with_bias_addition);
RUN_TEST(bad_pool);
RUN_TEST(negative_relu_clipping);
RUN_TEST(nan_relu_clipping);
RUN_TEST(negative_nan_relu_clipping);
RUN_TEST(negative_conv2d_clipping);
RUN_TEST(nan_conv2d_clipping);
RUN_TEST(negative_nan_conv2d_clipping);
return UNITY_END();
}
zDNN-1.1.2/tests/testDriver_query.c 0000664 0000000 0000000 00000016274 15000221702 0017211 0 ustar 00root root 0000000 0000000 // SPDX-License-Identifier: Apache-2.0
/*
* Copyright IBM Corp. 2021, 2024
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#include "testsupport.h"
#include
#define NNPA_OP_FAKE 255
#define NNPA_PARMBLKFORMAT_FAKE 127
#define QUERY_DATATYPE_FAKE (1 << 0)
#define QUERY_LAYOUTFMT_FAKE (10 << 0)
#define QUERY_BFPFMT_FAKE (1 << 0)
void setUp(void) { VERIFY_HW_ENV; }
void tearDown(void) {}
void test_function_available() {
TEST_ASSERT_MESSAGE(
zdnn_is_nnpa_function_installed(3, NNPA_ADD, NNPA_BATCHNORMALIZATION,
NNPA_SOFTMAX) == true,
"One or more of the requested functions is not detected as available");
}
void test_function_not_available() {
TEST_ASSERT_MESSAGE(zdnn_is_nnpa_function_installed(3, NNPA_ADD,
NNPA_BATCHNORMALIZATION,
NNPA_OP_FAKE) == false,
"NNPA_OP_FAKE is not detected as unavailable");
}
void test_parm_blk_fmt_installed() {
TEST_ASSERT_MESSAGE(
zdnn_is_nnpa_parmblk_fmt_installed(1, NNPA_PARMBLKFORMAT_0) == true,
"NNPA_PARMBLKFORMAT_TENSORDESC is not detected as available");
}
void test_parm_blk_fmt_not_installed() {
TEST_ASSERT_MESSAGE(
zdnn_is_nnpa_parmblk_fmt_installed(2, NNPA_PARMBLKFORMAT_FAKE,
NNPA_PARMBLKFORMAT_0) == false,
"NNPA_PARMBLKFORMAT_FAKE is not detected as unavailable");
}
void test_datatype_installed() {
TEST_ASSERT_MESSAGE(
zdnn_is_nnpa_datatype_installed(QUERY_DATATYPE_INTERNAL1) == true,
"NNPA_QAF_DATATYPE_INTERNAL1 is not detected as available");
}
void test_datatype_not_installed() {
TEST_ASSERT_MESSAGE(zdnn_is_nnpa_datatype_installed(QUERY_DATATYPE_INTERNAL1 |
QUERY_DATATYPE_FAKE) ==
false,
"QUERY_DATATYPE_FAKE is not detected as unavailable");
}
void test_datalayout_installed() {
TEST_ASSERT_MESSAGE(
zdnn_is_nnpa_layout_fmt_installed(QUERY_LAYOUTFMT_4DFEATURE |
QUERY_LAYOUTFMT_4DKERNEL) == true,
"NNPA_QAF_DATALAYOUT_4DFEATURETENSOR is not detected as available");
}
void test_datalayout_not_installed() {
TEST_ASSERT_MESSAGE(zdnn_is_nnpa_layout_fmt_installed(
QUERY_LAYOUTFMT_4DFEATURE | QUERY_LAYOUTFMT_4DKERNEL |
QUERY_LAYOUTFMT_FAKE) == false,
"QUERY_LAYOUTFMT_FAKE is not detected as unavailable");
}
void test_datatype_conversion_installed() {
TEST_ASSERT_MESSAGE(
zdnn_is_nnpa_conversion_installed(
NNPA_DATATYPE_1, QUERY_BFPFMT_TINY | QUERY_BFPFMT_SHORT) == true,
"QUERY_BFPFMT_TINY | QUERY_BFPFMT_SHORT is not detected as available");
}
void test_datatype_conversion_not_installed() {
TEST_ASSERT_MESSAGE(
zdnn_is_nnpa_conversion_installed(NNPA_DATATYPE_1,
QUERY_BFPFMT_TINY | QUERY_BFPFMT_SHORT |
QUERY_BFPFMT_FAKE) == false,
"QUERY_BFPFMT_FAKE is not detected as unavailable");
}
// Values from AR11010-12
#define MAXIMUM_DIMENSION_INDEX_SIZE ((uint32_t)1 << 15) // 32768
#define MAX_DIM4_INDEX_SIZE ((uint32_t)1 << 15) // 32768
#define MAX_DIM3_INDEX_SIZE ((uint32_t)1 << 15) // 32768
#define MAX_DIM2_INDEX_SIZE ((uint32_t)1 << 20) // 1048576
#define MAX_DIM1_INDEX_SIZE ((uint32_t)1 << 21) // 2097152
#define MAXIMUM_TENSOR_SIZE ((uint64_t)1 << 32) // 4294967296
void test_get_max_dim_idx_size() {
TEST_ASSERT_MESSAGE_FORMATTED(
zdnn_get_nnpa_max_dim_idx_size() == MAXIMUM_DIMENSION_INDEX_SIZE,
"zdnn_get_nnpa_max_dim_idx_size() %u did not return %u",
zdnn_get_nnpa_max_dim_idx_size(), MAXIMUM_DIMENSION_INDEX_SIZE);
}
void test_get_max_dim4_idx_size() {
uint32_t expected_index_size = MAX_DIM4_INDEX_SIZE;
TEST_ASSERT_MESSAGE_FORMATTED(zdnn_get_max_for_dim(4) == expected_index_size,
"zdnn_get_max_for_dim() %u did not return %u",
zdnn_get_max_for_dim(4), expected_index_size);
}
void test_get_max_dim3_idx_size() {
uint32_t expected_index_size = MAX_DIM3_INDEX_SIZE;
TEST_ASSERT_MESSAGE_FORMATTED(zdnn_get_max_for_dim(3) == expected_index_size,
"zdnn_get_max_for_dim(3) %u did not return %u",
zdnn_get_max_for_dim(3), expected_index_size);
}
void test_get_max_dim2_idx_size() {
uint32_t expected_index_size = nnpa_query_result.max_dim2_index_size
? MAX_DIM2_INDEX_SIZE
: MAXIMUM_DIMENSION_INDEX_SIZE;
TEST_ASSERT_MESSAGE_FORMATTED(zdnn_get_max_for_dim(2) == expected_index_size,
"zdnn_get_max_for_dim(2) %u did not return %u",
zdnn_get_max_for_dim(2), expected_index_size);
}
void test_get_max_dim1_idx_size() {
uint32_t expected_index_size = nnpa_query_result.max_dim1_index_size
? MAX_DIM1_INDEX_SIZE
: MAXIMUM_DIMENSION_INDEX_SIZE;
TEST_ASSERT_MESSAGE_FORMATTED(zdnn_get_max_for_dim(1) == expected_index_size,
"zdnn_get_max_for_dim(1) %u did not return %u",
zdnn_get_max_for_dim(1), expected_index_size);
}
void test_get_max_tensor_size() {
TEST_ASSERT_MESSAGE_FORMATTED(
zdnn_get_nnpa_max_tensor_size() == MAXIMUM_TENSOR_SIZE,
"zdnn_get_nnpa_max_tensor_size() %" PRIu64 " did not return %" PRIu64,
zdnn_get_nnpa_max_tensor_size(), MAXIMUM_TENSOR_SIZE);
}
// eyeball inspection
void test_print_version() {
printf("version = %04x\n", zdnn_get_library_version());
printf("version string = %s\n", zdnn_get_library_version_str());
}
// ------------------------------------------------------------------------------------------------
int main(void) {
UNITY_BEGIN();
RUN_TEST(test_function_available);
RUN_TEST(test_function_not_available);
RUN_TEST(test_parm_blk_fmt_installed);
RUN_TEST(test_parm_blk_fmt_not_installed);
RUN_TEST(test_datatype_installed);
RUN_TEST(test_datatype_not_installed);
RUN_TEST(test_datalayout_installed);
RUN_TEST(test_datalayout_not_installed);
RUN_TEST(test_datatype_conversion_installed);
RUN_TEST(test_datatype_conversion_not_installed);
RUN_TEST(test_get_max_dim_idx_size);
RUN_TEST(test_get_max_dim4_idx_size);
RUN_TEST(test_get_max_dim3_idx_size);
RUN_TEST(test_get_max_dim2_idx_size);
RUN_TEST(test_get_max_dim1_idx_size);
RUN_TEST(test_get_max_tensor_size);
RUN_TEST(test_print_version);
return UNITY_END();
}
zDNN-1.1.2/tests/testDriver_reshape_ztensor.c 0000664 0000000 0000000 00000025425 15000221702 0021255 0 ustar 00root root 0000000 0000000 // SPDX-License-Identifier: Apache-2.0
/*
* Copyright IBM Corp. 2021, 2024
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#include
#include
#include
#include
#include
#include "testsupport.h"
void setUp(void) { VERIFY_HW_ENV; }
void tearDown(void) {}
/*
* Non-error scenario general strategy:
*
* Create 2 tensors:
* tensor A: shape (x, y, z, a)
* tensor B: shape (i, j, k, b)
* which (x * y * z * a) == (i * j * k * b)
*
* Create raw data of (x * y * z * a) elements
*
* Stickify raw data to tensor A's buffer
* zdnn_reshape_ztensor() from tensor A to tensor B
*
* Compare tensor B's buffer to the raw data, element by element, using
* get_stick_offset() with respect to tensor B's shape
*
* Compare by values due to precision loss:
* A goes from FP16/FP32/BFLOAT -> DLFLOAT16, meaning
* B goes from FP16/FP32/BFLOAT -> DLFLOAT16 -> FP32 -> DLFLOAT16
*/
void test(zdnn_data_layouts src_layout, uint32_t src_dim4, uint32_t src_dim3,
uint32_t src_dim2, uint32_t src_dim1, zdnn_data_layouts dest_layout,
uint32_t dest_dim4, uint32_t dest_dim3, uint32_t dest_dim2,
uint32_t dest_dim1, zdnn_status exp_status) {
zdnn_status status;
zdnn_tensor_desc src_pre_tfrmd_desc, dest_pre_tfrmd_desc, src_tfrmd_desc,
dest_tfrmd_desc;
zdnn_ztensor src_ztensor, dest_ztensor;
zdnn_init_pre_transformed_desc(src_layout, test_datatype, &src_pre_tfrmd_desc,
src_dim4, src_dim3, src_dim2, src_dim1);
zdnn_init_pre_transformed_desc(dest_layout, test_datatype,
&dest_pre_tfrmd_desc, dest_dim4, dest_dim3,
dest_dim2, dest_dim1);
zdnn_generate_transformed_desc(&src_pre_tfrmd_desc, &src_tfrmd_desc);
zdnn_generate_transformed_desc(&dest_pre_tfrmd_desc, &dest_tfrmd_desc);
status = zdnn_init_ztensor_with_malloc(&src_pre_tfrmd_desc, &src_tfrmd_desc,
&src_ztensor);
TEST_ASSERT_MESSAGE_FORMATTED(
status == ZDNN_OK,
"zdnn_init_ztensor_with_malloc() (src) failed, status = %08x", status);
void *raw_data = create_and_fill_random_fp_data(&src_ztensor);
status = zdnn_init_ztensor_with_malloc(&dest_pre_tfrmd_desc, &dest_tfrmd_desc,
&dest_ztensor);
TEST_ASSERT_MESSAGE_FORMATTED(
status == ZDNN_OK,
"zdnn_init_ztensor_with_malloc() (dest) failed, status = %08x", status);
status = zdnn_transform_ztensor(&src_ztensor, raw_data);
TEST_ASSERT_MESSAGE_FORMATTED(
status == ZDNN_OK, "zdnn_transform_ztensor() failed, status = %08x",
status);
status = zdnn_reshape_ztensor(&src_ztensor, &dest_ztensor);
if (exp_status == ZDNN_OK) {
TEST_ASSERT_MESSAGE_FORMATTED(
status == ZDNN_OK, "zdnn_reshape_ztensor() failed, status = %08x",
status);
TEST_ASSERT_MESSAGE(dest_ztensor.is_transformed == true,
"zdnn_reshape_ztensor() was successful but "
"did not set is_transformed properly for "
"destination ztensor");
uint64_t raw_offset = 0;
uint64_t cnt = 0;
for (uint32_t i = 0; i < dest_dim4; i++) {
for (uint32_t j = 0; j < dest_dim3; j++) {
for (uint32_t k = 0; k < dest_dim2; k++) {
for (uint32_t b = 0; b < dest_dim1; b++) {
uint64_t dest_offset =
get_stick_offset(i, j, k, b, &dest_tfrmd_desc);
uint16_t raw_dlf16_val = 0; // this is the "expected" value
uint16_t dest_dlf16_val =
*(uint16_t *)((uintptr_t)dest_ztensor.buffer + dest_offset);
// these 2 are for printf-ing only
float raw_float_val = 0;
float dest_float_val = cnvt_1_dlf16_to_fp32(dest_dlf16_val);
if (test_datatype == BFLOAT) {
raw_float_val = cnvt_1_bfloat_to_fp32(
*(uint16_t *)((uintptr_t)raw_data + raw_offset));
raw_dlf16_val = cnvt_1_bfloat_to_dlf16(
*(uint16_t *)((uintptr_t)raw_data + raw_offset));
} else if (test_datatype == FP16) {
raw_float_val = cnvt_1_fp16_to_fp32(
*(uint16_t *)((uintptr_t)raw_data + raw_offset));
raw_dlf16_val = cnvt_1_fp16_to_dlf16(
*(uint16_t *)((uintptr_t)raw_data + raw_offset));
} else if (test_datatype == FP32) {
raw_float_val = *(float *)((uintptr_t)raw_data + raw_offset);
raw_dlf16_val = cnvt_1_fp32_to_dlf16(raw_float_val);
}
TEST_ASSERT_MESSAGE_FORMATTED(
almost_equal_dlf16(dest_dlf16_val, raw_dlf16_val),
"Incorrect value at element %" PRIu64
": Expected: %.6f, Found (offset %" PRIu64 "): %.6f",
cnt, raw_float_val, dest_offset, dest_float_val);
raw_offset += get_data_type_size(test_datatype);
cnt++;
}
}
}
}
} else {
TEST_ASSERT_MESSAGE_FORMATTED(exp_status == status,
"expected status = %08x, got status = %08x",
exp_status, status);
TEST_ASSERT_MESSAGE(
dest_ztensor.is_transformed == false,
"zdnn_reshape_ztensor() failed but set is_transformed improperly for "
"destination ztensor.");
}
free(raw_data);
}
// N/H/W/C all the same (memcpy whole buffer)
void test_4x5x6x7_4x5x6x7() {
test(ZDNN_NHWC, 4, 5, 6, 7, ZDNN_NHWC, 4, 5, 6, 7, ZDNN_OK);
}
// same C, different N/H/W (sticks memcpy)
void test_1x2x3x4_6x1x1x4() {
test(ZDNN_NHWC, 1, 2, 3, 4, ZDNN_NHWC, 6, 1, 1, 4, ZDNN_OK);
}
// same C, different N/H/W, more elements (sticks memcpy)
void test_2x3x4x68_4x1x6x68() {
test(ZDNN_NHWC, 2, 3, 4, 68, ZDNN_NHWC, 4, 1, 6, 68, ZDNN_OK);
}
// same C, different N/H/W, even more elements (sticks memcpy)
void test_4x3x40x70_8x20x3x70() {
test(ZDNN_NHWC, 2, 3, 4, 68, ZDNN_NHWC, 4, 1, 6, 68, ZDNN_OK);
}
// N/H/W/C all different
void test_4x4x4x4_1x1x16x16() {
test(ZDNN_NHWC, 4, 4, 4, 4, ZDNN_NHWC, 1, 1, 16, 16, ZDNN_OK);
}
void test_fail_total_elements_mismatch() {
test(ZDNN_NHWC, 4, 4, 4, 4, ZDNN_NHWC, 1, 1, 16, 15, ZDNN_INVALID_SHAPE);
}
void test_fail_not_nhwc_nor_hwck() {
zdnn_status status, exp_status = ZDNN_INVALID_LAYOUT;
zdnn_tensor_desc src_pre_tfrmd_desc, dest_pre_tfrmd_desc, src_tfrmd_desc,
dest_tfrmd_desc;
zdnn_ztensor src_ztensor, dest_ztensor;
zdnn_init_pre_transformed_desc(ZDNN_NHWC, FP16, &src_pre_tfrmd_desc, 4, 4, 4,
4);
zdnn_init_pre_transformed_desc(ZDNN_NHWC, FP16, &dest_pre_tfrmd_desc, 4, 4, 4,
4);
zdnn_generate_transformed_desc(&src_pre_tfrmd_desc, &src_tfrmd_desc);
zdnn_generate_transformed_desc(&dest_pre_tfrmd_desc, &dest_tfrmd_desc);
zdnn_init_ztensor(&src_pre_tfrmd_desc, &src_tfrmd_desc, &src_ztensor);
zdnn_init_ztensor(&dest_pre_tfrmd_desc, &dest_tfrmd_desc, &dest_ztensor);
src_ztensor.is_transformed = true;
// sabotage the layouts
src_tfrmd_desc.layout = ZDNN_NCHW;
dest_tfrmd_desc.layout = ZDNN_NCHW;
status = zdnn_reshape_ztensor(&src_ztensor, &dest_ztensor);
TEST_ASSERT_MESSAGE_FORMATTED(exp_status == status,
"expected status = %08x, got status = %08x",
exp_status, status);
}
void test_fail_not_same_layout() {
test_datatype = FP16;
test(ZDNN_NHWC, 4, 5, 6, 7, ZDNN_HWCK, 4, 5, 6, 7, ZDNN_INVALID_LAYOUT);
}
void test_fail_src_not_transformed() {
zdnn_status status, exp_status = ZDNN_INVALID_STATE;
test_datatype = FP16;
zdnn_tensor_desc src_pre_tfrmd_desc, dest_pre_tfrmd_desc, src_tfrmd_desc,
dest_tfrmd_desc;
zdnn_ztensor src_ztensor, dest_ztensor;
zdnn_init_pre_transformed_desc(ZDNN_NHWC, test_datatype, &src_pre_tfrmd_desc,
4, 4, 4, 4);
zdnn_init_pre_transformed_desc(ZDNN_NHWC, test_datatype, &dest_pre_tfrmd_desc,
4, 4, 4, 4);
zdnn_generate_transformed_desc(&src_pre_tfrmd_desc, &src_tfrmd_desc);
zdnn_generate_transformed_desc(&dest_pre_tfrmd_desc, &dest_tfrmd_desc);
zdnn_init_ztensor(&src_pre_tfrmd_desc, &src_tfrmd_desc, &src_ztensor);
zdnn_init_ztensor(&dest_pre_tfrmd_desc, &dest_tfrmd_desc, &dest_ztensor);
// src_ztensor is NOT transformed at this point
status = zdnn_reshape_ztensor(&src_ztensor, &dest_ztensor);
TEST_ASSERT_MESSAGE_FORMATTED(exp_status == status,
"expected status = %08x, got status = %08x",
exp_status, status);
}
void test_fail_dest_already_transformed() {
zdnn_status status, exp_status = ZDNN_INVALID_STATE;
test_datatype = FP16;
zdnn_tensor_desc src_pre_tfrmd_desc, dest_pre_tfrmd_desc, src_tfrmd_desc,
dest_tfrmd_desc;
zdnn_ztensor src_ztensor, dest_ztensor;
zdnn_init_pre_transformed_desc(ZDNN_NHWC, test_datatype, &src_pre_tfrmd_desc,
4, 4, 4, 4);
zdnn_init_pre_transformed_desc(ZDNN_NHWC, test_datatype, &dest_pre_tfrmd_desc,
4, 4, 4, 4);
zdnn_generate_transformed_desc(&src_pre_tfrmd_desc, &src_tfrmd_desc);
zdnn_generate_transformed_desc(&dest_pre_tfrmd_desc, &dest_tfrmd_desc);
zdnn_init_ztensor(&src_pre_tfrmd_desc, &src_tfrmd_desc, &src_ztensor);
zdnn_init_ztensor(&dest_pre_tfrmd_desc, &dest_tfrmd_desc, &dest_ztensor);
src_ztensor.is_transformed = true;
// sabotage dest_ztensor
dest_ztensor.is_transformed = true;
status = zdnn_reshape_ztensor(&src_ztensor, &dest_ztensor);
TEST_ASSERT_MESSAGE_FORMATTED(exp_status == status,
"expected status = %08x, got status = %08x",
exp_status, status);
}
int main(void) {
UNITY_BEGIN();
RUN_TEST_ALL_DLFLOAT16_PRE_DATATYPES(test_4x5x6x7_4x5x6x7);
RUN_TEST_ALL_DLFLOAT16_PRE_DATATYPES(test_1x2x3x4_6x1x1x4);
RUN_TEST_ALL_DLFLOAT16_PRE_DATATYPES(test_2x3x4x68_4x1x6x68);
RUN_TEST_ALL_DLFLOAT16_PRE_DATATYPES(test_4x3x40x70_8x20x3x70);
RUN_TEST_ALL_DLFLOAT16_PRE_DATATYPES(test_4x4x4x4_1x1x16x16);
RUN_TEST_ALL_DLFLOAT16_PRE_DATATYPES(test_fail_total_elements_mismatch);
RUN_TEST(test_fail_not_nhwc_nor_hwck);
RUN_TEST(test_fail_not_same_layout);
RUN_TEST(test_fail_src_not_transformed);
RUN_TEST(test_fail_dest_already_transformed);
return UNITY_END();
}
zDNN-1.1.2/tests/testDriver_status_diag.c 0000664 0000000 0000000 00000005351 15000221702 0020345 0 ustar 00root root 0000000 0000000 // SPDX-License-Identifier: Apache-2.0
/*
* Copyright IBM Corp. 2021, 2024
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#include "testsupport.h"
#include
#include
#include
#include
/*********************************************************************
* This testcase only works for LoZ, as there's no way to easily
* verify ctrace()'s output under z/OS. The intent of this testcase
* is to verify if the status_diag code gets invoked when we want it
* to, not as much as if it's producing the correct output.
* *******************************************************************/
void setUp(void) {
#ifdef __MVS__
TEST_IGNORE_MESSAGE("No easy way to verify ctrace on z/OS. Skipping.");
#endif // __MVS__
}
void tearDown(void) {}
void try_diag(uint32_t status_to_diag, uint32_t status_to_set,
bool expect_backtrace) {
status_diag = status_to_diag;
char buf_stdout[BUFSIZ] = {0};
stdout_to_pipe();
set_zdnn_status(status_to_set, __func__, __FILE__, __LINE__,
"this is a test");
restore_stdout(buf_stdout, BUFSIZ);
/*
the backtrace should have something like:
obj/../../aiu/libzdnn.so.1(set_zdnn_status+0x1d4)[0x3ffb750a19c]
./obj/testDriver_status_diag.out() [0x1001a2c]
./obj/testDriver_status_diag.out() [0x1001ade]
./obj/testDriver_status_diag.out() [0x1005012]
./obj/testDriver_status_diag.out() [0x1001baa]
so search for "libzdnn" in the captured STDOUT output
*/
if (expect_backtrace) {
TEST_ASSERT_MESSAGE(strstr(buf_stdout, "libzdnn") != NULL,
"Can't find backtrace in buf_stdout");
} else {
TEST_ASSERT_MESSAGE(strstr(buf_stdout, "libzdnn") == NULL,
"Backtrace unexpectedly appears");
}
}
void test_real_error() {
try_diag(ZDNN_INVALID_SHAPE, ZDNN_INVALID_SHAPE, true);
}
void test_zdnn_ok() { try_diag(ZDNN_OK, ZDNN_OK, true); }
void test_not_match1() { try_diag(ZDNN_INVALID_SHAPE, ZDNN_OK, false); }
void test_not_match2() {
try_diag(ZDNN_INVALID_SHAPE, ZDNN_INVALID_FORMAT, false);
}
int main() {
UNITY_BEGIN();
RUN_TEST(test_not_match1);
RUN_TEST(test_not_match2);
RUN_TEST(test_real_error);
RUN_TEST(test_zdnn_ok);
return UNITY_END();
}
zDNN-1.1.2/tests/testDriver_stickify.c 0000664 0000000 0000000 00000210303 15000221702 0017656 0 ustar 00root root 0000000 0000000 // SPDX-License-Identifier: Apache-2.0
/*
* Copyright IBM Corp. 2021, 2024
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#include
#include
#include
#include
#include "testsupport.h"
void setUp(void) { VERIFY_HW_ENV; }
void tearDown(void) {}
//=================================================================================================
// tests for stickify
void test_stickify(uint32_t dim4, uint32_t dim3, uint32_t dim2, uint32_t dim1,
zdnn_data_layouts layout) {
/*
Use 1x4x4x1 as example:
1) Create the input tensor descriptor
2) Create the raw (i.e., dense) input tensor data with random
FP16/FP32/BFLOAT values 1 >= x > SMALLEST_RANDOM_FP.
For 1x4x4x1 we have 16 elements.
3) Stickify the data to ztensor. Now ztensor.buffer has 16 DLFLOAT16
elements with all the necessary paddings.
4) get the array of address offsets where the values are expected to be in
the stickified buffer.
5) Perform the check:
fp16_to_dlf16(input_data[n]) == output_data[n]
(i.e., stick_area[offsets[n]])?
*/
zdnn_tensor_desc pre_tfrmd_desc, tfrmd_desc;
zdnn_ztensor ztensor;
zdnn_status status;
void *data;
switch (layout) {
case (ZDNN_1D):
zdnn_init_pre_transformed_desc(layout, test_datatype, &pre_tfrmd_desc,
dim1);
break;
case (ZDNN_2D):
case (ZDNN_2DS):
zdnn_init_pre_transformed_desc(layout, test_datatype, &pre_tfrmd_desc, dim2,
dim1);
break;
case (ZDNN_3D):
case (ZDNN_3DS):
zdnn_init_pre_transformed_desc(layout, test_datatype, &pre_tfrmd_desc, dim3,
dim2, dim1);
break;
default:
zdnn_init_pre_transformed_desc(layout, test_datatype, &pre_tfrmd_desc, dim4,
dim3, dim2, dim1);
}
status = zdnn_generate_transformed_desc(&pre_tfrmd_desc, &tfrmd_desc);
TEST_ASSERT_MESSAGE_FORMATTED(
status == ZDNN_OK,
"zdnn_generate_transformed_desc() failed (status = %08x)", status);
status =
zdnn_init_ztensor_with_malloc(&pre_tfrmd_desc, &tfrmd_desc, &ztensor);
TEST_ASSERT_MESSAGE_FORMATTED(
status == ZDNN_OK,
"zdnn_init_ztensor_with_malloc() failed (status = %08x)", status);
data = create_and_fill_random_fp_data(&ztensor);
status = zdnn_transform_ztensor(&ztensor, data);
TEST_ASSERT_MESSAGE_FORMATTED(
status == ZDNN_OK,
"zdnn_transform_ztensor() failed, status = %08x "
"(%s)",
status, zdnn_get_status_message(status));
BEGIN_BLOCK_IF_LOGLEVEL_DEBUG {
printf("%s(): dumpdata_origtensor\n", __func__);
dumpdata_origtensor(&pre_tfrmd_desc, data, AS_HEX);
dumpdata_origtensor(&pre_tfrmd_desc, data, AS_FLOAT);
printf("%s(): dumpdata_ztensor\n", __func__);
dumpdata_ztensor(&ztensor, AS_HEX, false);
dumpdata_ztensor(&ztensor, AS_FLOAT, false);
}
uint64_t num_elements = get_num_elements(&ztensor, ELEMENTS_PRE);
size_t *offsets = alloc_offsets(&ztensor);
for (uint64_t i = 0; i < num_elements; i++) {
// value in stick area, stickified
uint16_t output_stickified_value =
*(uint16_t *)((uintptr_t)ztensor.buffer + offsets[i]);
// input value converted to DLFLOAT16, this is the "expected" value
uint16_t stickified_input_value = 0;
switch (test_datatype) {
case BFLOAT:
stickified_input_value = cnvt_1_bfloat_to_dlf16(((uint16_t *)data)[i]);
break;
case FP16:
stickified_input_value = cnvt_1_fp16_to_dlf16(((uint16_t *)data)[i]);
break;
case FP32:
stickified_input_value = cnvt_1_fp32_to_dlf16(((float *)data)[i]);
break;
default:
TEST_FAIL_MESSAGE("Unsupported data type");
return;
}
TEST_ASSERT_MESSAGE_FORMATTED(
almost_equal_dlf16(output_stickified_value, stickified_input_value),
"Incorrect value at element %" PRIu64 ": Stickified: "
"%.6f, Expected: %.6f",
i, cnvt_1_dlf16_to_fp32(output_stickified_value),
cnvt_1_dlf16_to_fp32(stickified_input_value));
}
// Free allocated storage
free(offsets);
free(data);
zdnn_free_ztensor_buffer(&ztensor);
}
/**************************************************************
* NHWC
**************************************************************/
/*
* Tensor with 16 entries, NHWC
* 1,4,4,1 NHWC will use one cell per stick, 4 sticks per page and a total of 4
* pages
*
* [0, 128, 256, 384, (H = 0)
* 4096, 4224, 4352, 4480, (H = 1)
* 8192, 8320, 8448, 8576, (H = 2)
* 12288, 12416, 12544, 12672] (H = 3)
*/
void test_nhwc_1x4x4x1() { test_stickify(1, 4, 4, 1, ZDNN_NHWC); };
void test_nhwc_1x4x4x2() { test_stickify(1, 4, 4, 2, ZDNN_NHWC); };
void test_nhwc_1x32x32x1() { test_stickify(1, 32, 32, 1, ZDNN_NHWC); };
void test_nhwc_1x32x32x2() { test_stickify(1, 32, 32, 2, ZDNN_NHWC); };
/*
* 3K entries in tensor, send to NHWC sticks
* Each stick uses 3 cells, and all 32 sticks of the page are used.
* 32 pages are used to store the values.
*/
void test_nhwc_1x32x32x3() { test_stickify(1, 32, 32, 3, ZDNN_NHWC); };
void test_nhwc_1x1x2x1() { test_stickify(1, 1, 2, 1, ZDNN_NHWC); };
void test_nhwc_1x1x2x2() { test_stickify(1, 1, 2, 2, ZDNN_NHWC); };
void test_nhwc_1x1x2x4() { test_stickify(1, 1, 2, 4, ZDNN_NHWC); };
void test_nhwc_1x1x2x7() { test_stickify(1, 1, 2, 7, ZDNN_NHWC); };
void test_nhwc_1x1x4x1() { test_stickify(1, 1, 4, 1, ZDNN_NHWC); };
void test_nhwc_1x1x4x2() { test_stickify(1, 1, 4, 2, ZDNN_NHWC); };
void test_nhwc_1x1x4x4() { test_stickify(1, 1, 4, 4, ZDNN_NHWC); };
void test_nhwc_1x1x4x7() { test_stickify(1, 1, 4, 7, ZDNN_NHWC); };
void test_nhwc_1x1x7x1() { test_stickify(1, 1, 7, 1, ZDNN_NHWC); };
void test_nhwc_1x1x7x2() { test_stickify(1, 1, 7, 2, ZDNN_NHWC); };
void test_nhwc_1x1x7x4() { test_stickify(1, 1, 7, 4, ZDNN_NHWC); };
void test_nhwc_1x1x7x7() { test_stickify(1, 1, 7, 7, ZDNN_NHWC); };
void test_nhwc_1x1x8x1() { test_stickify(1, 1, 8, 1, ZDNN_NHWC); };
void test_nhwc_1x1x8x2() { test_stickify(1, 1, 8, 2, ZDNN_NHWC); };
void test_nhwc_1x1x8x4() { test_stickify(1, 1, 8, 4, ZDNN_NHWC); };
void test_nhwc_1x1x8x7() { test_stickify(1, 1, 8, 7, ZDNN_NHWC); };
void test_nhwc_1x1x13x1() { test_stickify(1, 1, 13, 1, ZDNN_NHWC); };
void test_nhwc_1x1x13x2() { test_stickify(1, 1, 13, 2, ZDNN_NHWC); };
void test_nhwc_1x1x13x4() { test_stickify(1, 1, 13, 4, ZDNN_NHWC); };
void test_nhwc_1x1x13x7() { test_stickify(1, 1, 13, 7, ZDNN_NHWC); };
void test_nhwc_1x1x100x1() { test_stickify(1, 1, 100, 1, ZDNN_NHWC); };
void test_nhwc_1x1x100x2() { test_stickify(1, 1, 100, 2, ZDNN_NHWC); };
void test_nhwc_1x1x100x4() { test_stickify(1, 1, 100, 4, ZDNN_NHWC); };
void test_nhwc_1x1x100x7() { test_stickify(1, 1, 100, 7, ZDNN_NHWC); };
void test_nhwc_2x3x2x1() { test_stickify(2, 3, 2, 1, ZDNN_NHWC); };
void test_nhwc_2x3x2x2() { test_stickify(2, 3, 2, 2, ZDNN_NHWC); };
void test_nhwc_2x3x2x4() { test_stickify(2, 3, 2, 4, ZDNN_NHWC); };
void test_nhwc_2x3x2x7() { test_stickify(2, 3, 2, 7, ZDNN_NHWC); };
void test_nhwc_2x3x4x1() { test_stickify(2, 3, 4, 1, ZDNN_NHWC); };
void test_nhwc_2x3x4x2() { test_stickify(2, 3, 4, 2, ZDNN_NHWC); };
void test_nhwc_2x3x4x4() { test_stickify(2, 3, 4, 4, ZDNN_NHWC); };
void test_nhwc_2x3x4x7() { test_stickify(2, 3, 4, 7, ZDNN_NHWC); };
void test_nhwc_2x3x7x1() { test_stickify(2, 3, 7, 1, ZDNN_NHWC); };
void test_nhwc_2x3x7x2() { test_stickify(2, 3, 7, 2, ZDNN_NHWC); };
void test_nhwc_2x3x7x4() { test_stickify(2, 3, 7, 4, ZDNN_NHWC); };
void test_nhwc_2x3x7x7() { test_stickify(2, 3, 7, 7, ZDNN_NHWC); };
void test_nhwc_2x3x8x1() { test_stickify(2, 3, 8, 1, ZDNN_NHWC); };
void test_nhwc_2x3x8x2() { test_stickify(2, 3, 8, 2, ZDNN_NHWC); };
void test_nhwc_2x3x8x4() { test_stickify(2, 3, 8, 4, ZDNN_NHWC); };
void test_nhwc_2x3x8x7() { test_stickify(2, 3, 8, 7, ZDNN_NHWC); };
void test_nhwc_2x3x13x1() { test_stickify(2, 3, 13, 1, ZDNN_NHWC); };
void test_nhwc_2x3x13x2() { test_stickify(2, 3, 13, 2, ZDNN_NHWC); };
void test_nhwc_2x3x13x4() { test_stickify(2, 3, 13, 4, ZDNN_NHWC); };
void test_nhwc_2x3x13x7() { test_stickify(2, 3, 13, 7, ZDNN_NHWC); };
void test_nhwc_2x3x100x1() { test_stickify(2, 3, 100, 1, ZDNN_NHWC); };
void test_nhwc_2x3x100x2() { test_stickify(2, 3, 100, 2, ZDNN_NHWC); };
void test_nhwc_2x3x100x4() { test_stickify(2, 3, 100, 4, ZDNN_NHWC); };
void test_nhwc_2x3x100x7() { test_stickify(2, 3, 100, 7, ZDNN_NHWC); };
void test_nhwc_3x2x2x1() { test_stickify(3, 2, 2, 1, ZDNN_NHWC); };
void test_nhwc_3x2x2x2() { test_stickify(3, 2, 2, 2, ZDNN_NHWC); };
void test_nhwc_3x2x2x4() { test_stickify(3, 2, 2, 4, ZDNN_NHWC); };
void test_nhwc_3x2x2x7() { test_stickify(3, 2, 2, 7, ZDNN_NHWC); };
void test_nhwc_3x2x4x1() { test_stickify(3, 2, 4, 1, ZDNN_NHWC); };
void test_nhwc_3x2x4x2() { test_stickify(3, 2, 4, 2, ZDNN_NHWC); };
void test_nhwc_3x2x4x4() { test_stickify(3, 2, 4, 4, ZDNN_NHWC); };
void test_nhwc_3x2x4x7() { test_stickify(3, 2, 4, 7, ZDNN_NHWC); };
void test_nhwc_3x2x7x1() { test_stickify(3, 2, 7, 1, ZDNN_NHWC); };
void test_nhwc_3x2x7x2() { test_stickify(3, 2, 7, 2, ZDNN_NHWC); };
void test_nhwc_3x2x7x4() { test_stickify(3, 2, 7, 4, ZDNN_NHWC); };
void test_nhwc_3x2x7x7() { test_stickify(3, 2, 7, 7, ZDNN_NHWC); };
void test_nhwc_3x2x8x1() { test_stickify(3, 2, 8, 1, ZDNN_NHWC); };
void test_nhwc_3x2x8x2() { test_stickify(3, 2, 8, 2, ZDNN_NHWC); };
void test_nhwc_3x2x8x4() { test_stickify(3, 2, 8, 4, ZDNN_NHWC); };
void test_nhwc_3x2x8x7() { test_stickify(3, 2, 8, 7, ZDNN_NHWC); };
void test_nhwc_3x2x13x1() { test_stickify(3, 2, 13, 1, ZDNN_NHWC); };
void test_nhwc_3x2x13x2() { test_stickify(3, 2, 13, 2, ZDNN_NHWC); };
void test_nhwc_3x2x13x4() { test_stickify(3, 2, 13, 4, ZDNN_NHWC); };
void test_nhwc_3x2x13x7() { test_stickify(3, 2, 13, 7, ZDNN_NHWC); };
void test_nhwc_3x2x100x1() { test_stickify(3, 2, 100, 1, ZDNN_NHWC); };
void test_nhwc_3x2x100x2() { test_stickify(3, 2, 100, 2, ZDNN_NHWC); };
void test_nhwc_3x2x100x4() { test_stickify(3, 2, 100, 4, ZDNN_NHWC); };
void test_nhwc_3x2x100x7() { test_stickify(3, 2, 100, 7, ZDNN_NHWC); };
/*
* This routine is a generic test routine, allowing various 'e1' values
* to be input. It tests stickification conversion (X -> DLFLOAT).
* It assumes the e4-e2 values are 1 in order to
* allow simpler assignment of the "offset" variable for
* examining values stored in the stick. e1 can range from 1 to 128,
* i.e. one or two pages of 64 values per stick.
*/
void test_nhwc_1x1x1xe1(uint32_t e1) { test_stickify(1, 1, 1, e1, ZDNN_NHWC); }
void test_nhwc_1x1x1x4() { test_nhwc_1x1x1xe1(4); }
void test_nhwc_1x1x1x5() { test_nhwc_1x1x1xe1(5); }
void test_nhwc_1x1x1x8() { test_nhwc_1x1x1xe1(8); }
void test_nhwc_1x1x1x9() { test_nhwc_1x1x1xe1(9); }
void test_nhwc_1x1x1x63() { test_nhwc_1x1x1xe1(63); }
void test_nhwc_1x1x1x64() { test_nhwc_1x1x1xe1(64); }
void test_nhwc_1x1x1x65() { test_nhwc_1x1x1xe1(65); }
void test_nhwc_1x1x1x127() { test_nhwc_1x1x1xe1(127); }
void test_nhwc_1x1x1x128() { test_nhwc_1x1x1xe1(128); }
void test_nhwc_1x2x3x4() { test_stickify(1, 2, 3, 4, ZDNN_NHWC); }
void test_nhwc_1x1x31x64() { test_stickify(1, 1, 31, 64, ZDNN_NHWC); }
void test_nhwc_1x1x32x64() { test_stickify(1, 1, 32, 64, ZDNN_NHWC); }
void test_nhwc_1x1x33x64() { test_stickify(1, 1, 33, 64, ZDNN_NHWC); }
void test_nhwc_1x1x32x63() { test_stickify(1, 1, 32, 63, ZDNN_NHWC); }
void test_nhwc_1x1x32x65() { test_stickify(1, 1, 32, 65, ZDNN_NHWC); }
void test_nhwc_1x1x4x127() { test_stickify(1, 1, 4, 127, ZDNN_NHWC); }
void test_nhwc_1x1x4x128() { test_stickify(1, 1, 4, 128, ZDNN_NHWC); }
void test_nhwc_1x1x4x129() { test_stickify(1, 1, 4, 129, ZDNN_NHWC); }
void test_nhwc_1x1x63x4() { test_stickify(1, 1, 63, 4, ZDNN_NHWC); }
void test_nhwc_1x1x64x4() { test_stickify(1, 1, 64, 4, ZDNN_NHWC); }
void test_nhwc_1x1x65x4() { test_stickify(1, 1, 65, 4, ZDNN_NHWC); }
void test_nhwc_2x3x33x129() { test_stickify(2, 3, 33, 129, ZDNN_NHWC); }
/*
* Tensor with 16 entries, 3DS
* 4,4,1 3DS will use one cell per stick, 4 sticks per page and a total of 4
* pages.
*/
void test_3ds_4x4x1() {
// first entry doesn't matter
test_stickify(9999, 4, 4, 1, ZDNN_3DS);
}
/*
* 3K entries in tensor, send to 3DS sticks
* Each stick uses 3 cells, and all 32 sticks of the page are used.
* 32 pages are used to store the values.
*
*/
void test_3ds_32x32x3() {
// first entry doesn't matter
test_stickify(9999, 32, 32, 3, ZDNN_3DS);
}
/*
* Tensor with 8 entries, 2DS
* 4,2 2DS will use two cells per stick, (implied 1 stick per page) and a total
* of 4 pages.
*/
void test_2ds_4x2() {
// first two entries don't matter in 2DS
test_stickify(9999, 9999, 4, 2, ZDNN_2DS);
}
/*
* Tensor with 4k entries, 2DS
* We expect this to require 4 pages total. Each dim2 will require 2 pages.
* The first page will have all 64 cells of all 32 sticks filled holding 2048
* values. A second page will have 1 stick with 1 cell filled to hold val 2049.
*/
void test_2ds_2x2049() {
// first two entries don't matter in 2DS
test_stickify(9999, 9999, 2, 2049, ZDNN_2DS);
}
void test_concat_stickify(zdnn_concat_info info, uint32_t dim3, uint32_t dim2,
uint32_t dim1) {
zdnn_tensor_desc pre_tfrmd_desc, tfrmd_desc;
zdnn_ztensor ztensor;
zdnn_status status;
void *data[] = {NULL, NULL, NULL, NULL};
uint8_t num_concats = 0;
if (CONCAT_RNN_TYPE(info) == RNN_TYPE_LSTM) {
num_concats = 4;
} else if (CONCAT_RNN_TYPE(info) == RNN_TYPE_GRU) {
num_concats = 3;
} else {
TEST_FAIL_MESSAGE_FORMATTED("bad concat info: %08x\n", info);
}
// Fill in pre_transformed_desc. If dim3 is set, we're concatenating a 3DS
// tensor otherwise assume 2DS.
if (dim3) {
// Initialize tensor descriptor
zdnn_init_pre_transformed_desc(ZDNN_3DS, test_datatype, &pre_tfrmd_desc,
dim3, dim2, dim1);
} else {
zdnn_init_pre_transformed_desc(ZDNN_2DS, test_datatype, &pre_tfrmd_desc,
dim2, dim1);
}
// Fill in transformed_desc.
status = zdnn_generate_transformed_desc_concatenated(&pre_tfrmd_desc, info,
&tfrmd_desc);
TEST_ASSERT_MESSAGE_FORMATTED(
status == ZDNN_OK,
"zdnn_generate_transformed_desc_concatenated() failed, status = %08x "
"(%s) (concat info = %08x)",
status, zdnn_get_status_message(status), info);
// Create ztensor and allocate space for it's buffer
status =
zdnn_init_ztensor_with_malloc(&pre_tfrmd_desc, &tfrmd_desc, &ztensor);
TEST_ASSERT_MESSAGE_FORMATTED(status == ZDNN_OK,
"zdnn_init_ztensor_with_malloc() failed, "
"status = %08x (%s) (concat info = %08x)",
status, zdnn_get_status_message(status), info);
// Fill in random data for each gate's original values
for (uint8_t i = 0; i < num_concats; i++) {
data[i] = create_and_fill_random_fp_data(&ztensor);
}
// Transform the original data values into the stickified ztensor
switch (num_concats) {
case 4:
status =
zdnn_transform_ztensor(&ztensor, data[0], data[1], data[2], data[3]);
break;
case 3:
status = zdnn_transform_ztensor(&ztensor, data[0], data[1], data[2]);
break;
default:
TEST_FAIL_MESSAGE_FORMATTED("num_concats of %d is not supported (concat "
"info = %08x)",
num_concats, info);
break;
}
TEST_ASSERT_MESSAGE_FORMATTED(
status == ZDNN_OK,
"zdnn_transform_ztensor() failed, status = %08x "
"(%s) (concat info = %08x)",
status, zdnn_get_status_message(status), info);
// Print the original data and stickified buffer
BEGIN_BLOCK_IF_LOGLEVEL_DEBUG {
// Each gate will have it's own input data so dump each one. Each will
// have the same dimensions/pre-tfrmd_desc
for (uint8_t i = 0; i < num_concats; i++) {
printf("%s(): dumpdata_origtensor for gate %d\n", __func__, i);
dumpdata_origtensor(ztensor.pre_transformed_desc, data[i], AS_HEX);
dumpdata_origtensor(ztensor.pre_transformed_desc, data[i], AS_FLOAT);
}
// The gates are concatenated into one ztensor so there's only one to dump
printf("%s(): dumpdata_ztensor (concatenated)\n", __func__);
dumpdata_ztensor(&ztensor, AS_HEX, false);
dumpdata_ztensor(&ztensor, AS_FLOAT, false);
}
uint64_t elements_per_concat =
get_num_elements(&ztensor, ELEMENTS_PRE_SINGLE_GATE);
uint64_t slices_per_concat = ztensor.transformed_desc->dim4;
uint64_t elements_per_concat_slice = elements_per_concat / slices_per_concat;
LOG_DEBUG("elements_per_concat = %ld, slices_per_concat = %ld, "
"elements_per_concat_slice = %ld",
elements_per_concat, slices_per_concat, elements_per_concat_slice);
size_t *offsets = alloc_rnn_offsets(&ztensor);
uint16_t input_stickified_value = 0;
uint16_t output_stickified_value;
uint32_t offset_index = 0;
// Loop through each offset in order and confirm the stickified value there
// matches the correct original input value. The loop handles the difference
// in output vs input element order caused by support of ztensor slicing.
for (uint32_t slice = 0; slice < ztensor.transformed_desc->dim4; slice++) {
size_t slice_offset =
slice * elements_per_concat_slice * get_data_type_size(test_datatype);
for (uint32_t concat = 0; concat < num_concats; concat++) {
void *concat_slice_data =
(void *)((uintptr_t)data[concat] + slice_offset);
for (uint32_t elm_i = 0; elm_i < elements_per_concat_slice; elm_i++) {
output_stickified_value =
*(uint16_t *)((uintptr_t)ztensor.buffer + offsets[offset_index]);
switch (test_datatype) {
// Convert input to stickified values for comparison to output.
case BFLOAT:
input_stickified_value =
cnvt_1_bfloat_to_dlf16(((uint16_t *)concat_slice_data)[elm_i]);
LOG_TRACE(
"offsets %d (native %s) = %04x vs %04x for input from "
"slice %d of concat %d at element index %d (%s converted to %s)",
offset_index, get_data_type_str(ZDNN_DLFLOAT16),
output_stickified_value, input_stickified_value, slice, concat,
elm_i, get_data_type_str(test_datatype),
get_data_type_str(ZDNN_DLFLOAT16));
break;
case FP16:
input_stickified_value =
cnvt_1_fp16_to_dlf16(((uint16_t *)concat_slice_data)[elm_i]);
LOG_TRACE(
"offsets %d (native %s) = %04x vs %04x for input from "
"slice %d of concat %d at element index %d (%s converted to %s)",
offset_index, get_data_type_str(ZDNN_DLFLOAT16),
output_stickified_value, input_stickified_value, slice, concat,
elm_i, get_data_type_str(test_datatype),
get_data_type_str(ZDNN_DLFLOAT16));
break;
case FP32:
input_stickified_value =
cnvt_1_fp32_to_dlf16(((float *)concat_slice_data)[elm_i]);
LOG_TRACE(
"offsets %d (%s converted to %s) = %4f vs %4f for input from "
"slice %d of concat %d at element index %d (native %s)",
offset_index, get_data_type_str(ZDNN_DLFLOAT16),
get_data_type_str(test_datatype),
cnvt_1_dlf16_to_fp32(output_stickified_value),
((float *)concat_slice_data)[elm_i], slice, concat, elm_i,
get_data_type_str(test_datatype));
break;
default:
TEST_FAIL_MESSAGE_FORMATTED("Unsupported data type %d (%s)",
test_datatype,
get_data_type_str(test_datatype));
break;
}
TEST_ASSERT_MESSAGE_FORMATTED(
output_stickified_value == input_stickified_value,
"offsets %u = %04x (native %s) but expected %04x (%s "
"converted to %s)",
offset_index, output_stickified_value,
get_data_type_str(ZDNN_DLFLOAT16), input_stickified_value,
get_data_type_str(test_datatype),
get_data_type_str(ZDNN_DLFLOAT16));
offset_index++;
}
}
}
// Free allocated storage
free(offsets);
for (uint8_t i = 0; i < num_concats; i++) {
free(data[i]);
}
zdnn_free_ztensor_buffer(&ztensor);
}
/*
* Create a FICO bias ztensor with 16 entries:
* 4 gates each having 1 direction each having 4 elements
*/
void test_lstm_biases_1x4() {
for (int i = 0; i < NUM_PREV_LAYERS; i++) {
for (int j = 0; j < NUM_BIASES_USAGES; j++) {
test_concat_stickify(RNN_TYPE_LSTM | prev_layers[i] | biases_usages[j], 0,
1, 4);
}
}
}
/*
* Create a FICO bias ztensor with 32 entries:
* 4 gates each having 2 directions each having 4 elements
*/
void test_lstm_biases_2x4() {
for (int i = 0; i < NUM_PREV_LAYERS; i++) {
for (int j = 0; j < NUM_BIASES_USAGES; j++) {
test_concat_stickify(RNN_TYPE_LSTM | prev_layers[i] | biases_usages[j], 0,
2, 4);
}
}
}
/*
* Create a FICO bias ztensor with 520 entries:
* 4 gates each having 2 directions each having 65 elements
*/
void test_lstm_biases_2x65() {
for (int i = 0; i < NUM_PREV_LAYERS; i++) {
for (int j = 0; j < NUM_BIASES_USAGES; j++) {
test_concat_stickify(RNN_TYPE_LSTM | prev_layers[i] | biases_usages[j], 0,
2, 65);
}
}
}
/*
* Create a FICO bias ztensor with 16392 entries:
* 4 gates each having 2 directions each having 2049 elements
* 2049 = 64 max cells per stick * 32 max sticks per page + 1. This means each
* direction will require two 4K pages to stickify.
*/
void test_lstm_biases_2x2049() {
for (int i = 0; i < NUM_PREV_LAYERS; i++) {
for (int j = 0; j < NUM_BIASES_USAGES; j++) {
test_concat_stickify(RNN_TYPE_LSTM | prev_layers[i] | biases_usages[j], 0,
2, 2049);
}
}
}
/*
* Create a FICO weights ztensor (PREV_LAYER_UNI) with 48 entries:
* 4 gates each having 1 direction each having 3 rows with 4 elements
*/
void test_lstm_no_vconcat_weights_1x3x4() {
test_concat_stickify(RNN_TYPE_LSTM | PREV_LAYER_UNI | USAGE_WEIGHTS, 1, 3, 4);
}
/*
* Create a FICO weights ztensor (PREV_LAYER_UNI) with 96 entries:
* 4 gates each having 2 directions each having 3 rows with 4 elements
*/
void test_lstm_no_vconcat_weights_2x3x4() {
test_concat_stickify(RNN_TYPE_LSTM | PREV_LAYER_UNI | USAGE_WEIGHTS, 2, 3, 4);
}
/*
* Create a FICO weights ztensor (PREV_LAYER_UNI) with 17160 entries:
* 4 gates each having 2 directions each having 33 rows with 65 elements
* Each direction will require two 4k pages to stickify as each cell has a max
* of 64 elements and each page has a max of 32 sticks.
*/
void test_lstm_no_vconcat_weights_2x33x65() {
test_concat_stickify(RNN_TYPE_LSTM | PREV_LAYER_UNI | USAGE_WEIGHTS, 2, 33,
65);
}
/*
* Create a FICO weights ztensor (PREV_LAYER_BIDIR) with 96 entries:
* 4 gates each having 1 direction each having 6 rows with 4 elements
*/
void test_lstm_prev_bidir_weights_1x6x4() {
test_concat_stickify(RNN_TYPE_LSTM | PREV_LAYER_BIDIR | USAGE_WEIGHTS, 1, 6,
4);
}
/*
* Create a FICO weights ztensor (PREV_LAYER_BIDIR) with 192 entries:
* 4 gates each having 2 directions each having 6 rows with 4 elements
*/
void test_lstm_prev_bidir_weights_2x6x4() {
test_concat_stickify(RNN_TYPE_LSTM | PREV_LAYER_BIDIR | USAGE_WEIGHTS, 2, 6,
4);
}
/*
* Create a FICO weights ztensor with (PREV_LAYER_BIDIR) 34320 entries:
* 4 gates each having 2 directions each having 66 rows with 65 elements
* Each direction will require eight 4k pages to stickify as each cell has a max
* of 64 elements and each page has a max of 32 sticks.
*/
void test_lstm_prev_bidir_weights_2x66x65() {
test_concat_stickify(RNN_TYPE_LSTM | PREV_LAYER_BIDIR | USAGE_WEIGHTS, 2, 66,
65);
}
/*
* Create a GRU bias ztensor with 12 entries:
* 3 gates each having 1 direction each having 4 elements
*/
void test_gru_biases_1x4() {
for (int i = 0; i < NUM_PREV_LAYERS; i++) {
for (int j = 0; j < NUM_BIASES_USAGES; j++) {
test_concat_stickify(RNN_TYPE_GRU | prev_layers[i] | biases_usages[j], 0,
1, 4);
}
}
}
/*
* Create a GRU bias ztensor with 24 entries:
* 3 gates each having 2 directions each having 4 elements
*/
void test_gru_biases_2x4() {
for (int i = 0; i < NUM_PREV_LAYERS; i++) {
for (int j = 0; j < NUM_BIASES_USAGES; j++) {
test_concat_stickify(RNN_TYPE_GRU | prev_layers[i] | biases_usages[j], 0,
2, 4);
}
}
}
/*
* Create a GRU bias ztensor with 390 entries:
* 3 gates each having 2 directions each having 65 elements
*/
void test_gru_biases_2x65() {
for (int i = 0; i < NUM_PREV_LAYERS; i++) {
for (int j = 0; j < NUM_BIASES_USAGES; j++) {
test_concat_stickify(RNN_TYPE_GRU | prev_layers[i] | biases_usages[j], 0,
2, 65);
}
}
}
/*
* Create a GRU bias ztensor with 12294 entries:
* 3 gates each having 2 directions each having 2049 elements
* 2049 = 64 max cells per stick * 32 max sticks per page + 1. This means each
* direction will require two 4K pages to stickify.
*/
void test_gru_biases_2x2049() {
for (int i = 0; i < NUM_PREV_LAYERS; i++) {
for (int j = 0; j < NUM_BIASES_USAGES; j++) {
test_concat_stickify(RNN_TYPE_GRU | prev_layers[i] | biases_usages[j], 0,
2, 2049);
}
}
}
/*
* Create a ZRH weights ztensor (PREV_LAYER_UNI) with 36 entries:
* 3 gates each having 1 direction each having 3 rows with 4 elements
*/
void test_gru_no_vconcat_weights_1x3x4() {
test_concat_stickify(RNN_TYPE_GRU | PREV_LAYER_UNI | USAGE_WEIGHTS, 1, 3, 4);
}
/*
* Create a ZRH weights ztensor (PREV_LAYER_UNI) with 72 entries:
* 3 gates each having 2 directions each having 3 rows with 4 elements
*/
void test_gru_no_vconcat_weights_2x3x4() {
test_concat_stickify(RNN_TYPE_GRU | PREV_LAYER_UNI | USAGE_WEIGHTS, 2, 3, 4);
}
/*
* Create a ZRH weights ztensor (PREV_LAYER_UNI) with 12870 entries:
* 3 gates each having 2 directions each having 33 rows with 65 elements
* Each direction will require two 4k pages to stickify as each cell has a max
* of 64 elements and each page has a max of 32 sticks.
*/
void test_gru_no_vconcat_weights_2x33x65() {
test_concat_stickify(RNN_TYPE_GRU | PREV_LAYER_UNI | USAGE_WEIGHTS, 2, 33,
65);
}
/*
* Create a ZRH weights ztensor (PREV_LAYER_BIDIR) with 72 entries:
* 3 gates each having 1 direction each having 6 rows with 4 elements
*/
void test_gru_prev_bidir_weights_1x6x4() {
test_concat_stickify(RNN_TYPE_GRU | PREV_LAYER_BIDIR | USAGE_WEIGHTS, 1, 6,
4);
}
/*
* Create a ZRH weights ztensor (PREV_LAYER_BIDIR) with 144 entries:
* 3 gates each having 2 directions each having 6 rows with 4 elements
*/
void test_gru_prev_bidir_weights_2x6x4() {
test_concat_stickify(RNN_TYPE_GRU | PREV_LAYER_BIDIR | USAGE_WEIGHTS, 2, 6,
4);
}
/*
* Create a ZRH weights ztensor with (PREV_LAYER_BIDIR) 25740 entries:
* 3 gates each having 2 directions each having 66 rows with 65 elements
* Each direction will require six 4k pages to stickify as each cell has a max
* of 64 elements and each page has a max of 32 sticks.
*/
void test_gru_prev_bidir_weights_2x66x65() {
test_concat_stickify(RNN_TYPE_GRU | PREV_LAYER_BIDIR | USAGE_WEIGHTS, 2, 66,
65);
}
void test_concat_weights_dim2(zdnn_concat_info info, uint32_t dim3,
uint32_t dim2, uint32_t dim1,
zdnn_status exp_status) {
zdnn_tensor_desc pre_tfrmd_desc, tfrmd_desc;
zdnn_ztensor ztensor;
zdnn_status status;
void *data[] = {NULL, NULL, NULL, NULL};
uint8_t num_concats = 0;
if (CONCAT_RNN_TYPE(info) == RNN_TYPE_LSTM) {
num_concats = 4;
} else if (CONCAT_RNN_TYPE(info) == RNN_TYPE_GRU) {
num_concats = 3;
} else {
TEST_FAIL_MESSAGE_FORMATTED("bad concat info: %08x\n", info);
}
// if dim2 is odd number coming in, +1 so we create a valid dim2 and create
// a valid ztensor with that. else use it as is
zdnn_init_pre_transformed_desc(ZDNN_3DS, test_datatype, &pre_tfrmd_desc, dim3,
((dim2 & 1) ? dim2 + 1 : dim2), dim1);
status = zdnn_generate_transformed_desc_concatenated(&pre_tfrmd_desc, info,
&tfrmd_desc);
TEST_ASSERT_MESSAGE_FORMATTED(
status == ZDNN_OK,
"zdnn_generate_transformed_desc_concatenated() failed, status = %08x "
"(%s) (concat info = %08x)",
status, zdnn_get_status_message(status), info);
// Create ztensor and allocate space for it's buffer
status =
zdnn_init_ztensor_with_malloc(&pre_tfrmd_desc, &tfrmd_desc, &ztensor);
TEST_ASSERT_MESSAGE_FORMATTED(status == ZDNN_OK,
"zdnn_init_ztensor_with_malloc() failed, "
"status = %08x (%s) (concat info = %08x)",
status, zdnn_get_status_message(status), info);
// Fill in random data for each gate's original values
for (uint8_t i = 0; i < num_concats; i++) {
data[i] = create_and_fill_random_fp_data(&ztensor);
}
// put back the incoming dim2 into pre-transformed desc as caller intended
ztensor.pre_transformed_desc->dim2 = dim2;
// Transform the original data values into the stickified ztensor
switch (num_concats) {
case 4:
status =
zdnn_transform_ztensor(&ztensor, data[0], data[1], data[2], data[3]);
break;
case 3:
status = zdnn_transform_ztensor(&ztensor, data[0], data[1], data[2]);
break;
default:
TEST_FAIL_MESSAGE_FORMATTED(
"num_concats of %d is not supported (concat info = %08x)", num_concats,
info);
break;
}
TEST_ASSERT_MESSAGE_FORMATTED(
status == exp_status,
"zdnn_transform_origtensor() unexpected status (status = %08x, "
"expects = %08x)",
status, exp_status);
for (uint8_t i = 0; i < num_concats; i++) {
free(data[i]);
}
zdnn_free_ztensor_buffer(&ztensor);
}
void test_lstm_no_vconcat_weights_odd_dim2_pass() {
test_concat_weights_dim2(RNN_TYPE_LSTM | USAGE_WEIGHTS | PREV_LAYER_UNI, 3, 9,
10, ZDNN_OK);
}
void test_lstm_prev_bidir_weights_odd_dim2_fail() {
test_concat_weights_dim2(PREV_LAYER_BIDIR | RNN_TYPE_LSTM | USAGE_WEIGHTS, 3,
9, 10, ZDNN_INVALID_SHAPE);
}
void test_gru_no_vconcat_weights_odd_dim2_pass() {
test_concat_weights_dim2(RNN_TYPE_LSTM | USAGE_WEIGHTS | PREV_LAYER_UNI, 3, 9,
10, ZDNN_OK);
}
void test_gru_prev_bidir_weights_odd_dim2_fail() {
test_concat_weights_dim2(RNN_TYPE_GRU | USAGE_WEIGHTS | PREV_LAYER_BIDIR, 3,
9, 10, ZDNN_INVALID_SHAPE);
}
/**************************************************************
* NCHW
**************************************************************/
void test_nchw_1x1x4x4() { test_stickify(1, 1, 4, 4, ZDNN_NCHW); }
void test_nchw_1x4x2x3() { test_stickify(1, 4, 2, 3, ZDNN_NCHW); }
void test_nchw_1x3x32x32() { test_stickify(1, 3, 32, 32, ZDNN_NCHW); }
void test_nchw_2x129x3x33() { test_stickify(2, 129, 3, 33, ZDNN_NCHW); }
void test_nchw_1x64x1x31() { test_stickify(1, 64, 1, 31, ZDNN_NCHW); }
void test_nchw_1x64x1x32() { test_stickify(1, 64, 1, 32, ZDNN_NCHW); }
void test_nchw_1x64x1x33() { test_stickify(1, 64, 1, 33, ZDNN_NCHW); }
void test_nchw_1x63x1x32() { test_stickify(1, 63, 1, 32, ZDNN_NCHW); }
void test_nchw_1x65x1x32() { test_stickify(1, 65, 1, 32, ZDNN_NCHW); }
void test_nchw_1x127x1x4() { test_stickify(1, 127, 1, 4, ZDNN_NCHW); }
void test_nchw_1x128x1x4() { test_stickify(1, 128, 1, 4, ZDNN_NCHW); }
void test_nchw_1x129x1x4() { test_stickify(1, 129, 1, 4, ZDNN_NCHW); }
void test_nchw_1x4x1x63() { test_stickify(1, 4, 1, 63, ZDNN_NCHW); }
void test_nchw_1x4x1x64() { test_stickify(1, 4, 1, 64, ZDNN_NCHW); }
void test_nchw_1x4x1x65() { test_stickify(1, 4, 1, 65, ZDNN_NCHW); }
/* create a NHWC input tensor data stream, then create a NCHW-copy of it via
* matrix-rotate, then stickify both. Compare the stickified data areas via
* memcmp and they should match 100% */
void nhwc_nchw_comp(uint32_t n, uint32_t h, uint32_t w, uint32_t c) {
zdnn_tensor_desc pre_tfrmd_desc_nhwc, pre_tfrmd_desc_nchw;
zdnn_tensor_desc tfrmd_desc_nhwc, tfrmd_desc_nchw;
zdnn_ztensor ztensor_nhwc, ztensor_nchw;
zdnn_status status;
void *data_nhwc, *data_nchw;
zdnn_init_pre_transformed_desc(ZDNN_NHWC, test_datatype, &pre_tfrmd_desc_nhwc,
n, h, w, c);
zdnn_init_pre_transformed_desc(ZDNN_NCHW, test_datatype, &pre_tfrmd_desc_nchw,
n, c, h, w);
zdnn_generate_transformed_desc(&pre_tfrmd_desc_nhwc, &tfrmd_desc_nhwc);
zdnn_generate_transformed_desc(&pre_tfrmd_desc_nchw, &tfrmd_desc_nchw);
status = zdnn_init_ztensor_with_malloc(&pre_tfrmd_desc_nhwc, &tfrmd_desc_nhwc,
&ztensor_nhwc);
TEST_ASSERT_MESSAGE_FORMATTED(
status == ZDNN_OK,
"zdnn_init_ztensor_with_malloc NHWC failed (status = %08x)", status);
status = zdnn_init_ztensor_with_malloc(&pre_tfrmd_desc_nchw, &tfrmd_desc_nchw,
&ztensor_nchw);
TEST_ASSERT_MESSAGE_FORMATTED(
status == ZDNN_OK,
"zdnn_init_ztensor_with_malloc NCHW failed (status = %08x)", status);
// create NHWC data stream, then matrix-rotate it to NCHW to another data
// stream
data_nhwc = create_and_fill_random_fp_data(&ztensor_nhwc);
data_nchw = malloc(pre_tfrmd_desc_nhwc.dim4 * pre_tfrmd_desc_nhwc.dim3 *
pre_tfrmd_desc_nhwc.dim2 * pre_tfrmd_desc_nhwc.dim1 *
get_data_type_size(pre_tfrmd_desc_nhwc.type));
nhwc_2_nchw(data_nhwc, n, h, w, c,
get_data_type_size(pre_tfrmd_desc_nhwc.type), data_nchw);
BEGIN_BLOCK_IF_LOGLEVEL_DEBUG {
printf("NHWC DATA "
"================================================================="
"\n");
dumpdata_origtensor(&pre_tfrmd_desc_nhwc, data_nhwc, AS_FLOAT);
printf("NCHW DATA "
"================================================================="
"\n");
dumpdata_origtensor(&pre_tfrmd_desc_nchw, data_nchw, AS_FLOAT);
}
memset(ztensor_nhwc.buffer, 0, ztensor_nhwc.buffer_size);
memset(ztensor_nchw.buffer, 0, ztensor_nchw.buffer_size);
status = zdnn_transform_ztensor(&ztensor_nhwc, data_nhwc);
TEST_ASSERT_MESSAGE_FORMATTED(
status == ZDNN_OK, "zdnn_transform_ztensor NHWC failed (status = %08x)",
status);
status = zdnn_transform_ztensor(&ztensor_nchw, data_nchw);
TEST_ASSERT_MESSAGE_FORMATTED(
status == ZDNN_OK, "zdnn_transform_ztensor NCHW failed (status = %08x)",
status);
BEGIN_BLOCK_IF_LOGLEVEL_DEBUG {
printf("NHWC STICK "
"================================================================="
"\n");
dumpdata_ztensor(&ztensor_nhwc, AS_FLOAT, false);
printf("NCHW STICK "
"================================================================="
"\n");
dumpdata_ztensor(&ztensor_nchw, AS_FLOAT, false);
}
TEST_ASSERT_MESSAGE(memcmp(ztensor_nchw.buffer, ztensor_nhwc.buffer,
ztensor_nhwc.buffer_size) == 0,
"Stickified NHWC and NCHW don't match");
free(data_nchw);
}
void test_nhwc_nchw_comp_1x4x4x1() { nhwc_nchw_comp(1, 4, 4, 1); }
void test_nhwc_nchw_comp_1x32x32x3() { nhwc_nchw_comp(1, 32, 32, 3); }
void test_nhwc_nchw_comp_2x3x33x129() { nhwc_nchw_comp(2, 3, 33, 129); }
// Reuse zdnn_ztensor without resetting is_transformed, expects
// ZDNN_BAD_PARAMETER
void test_ztensor_reuse_with_reset() {
zdnn_tensor_desc pre_tfrmd_desc, tfrmd_desc;
zdnn_ztensor ztensor;
zdnn_status status;
unsigned char *data, *data2;
zdnn_init_pre_transformed_desc(ZDNN_NHWC, FP16, &pre_tfrmd_desc, 1, 4, 4, 1);
status = zdnn_generate_transformed_desc(&pre_tfrmd_desc, &tfrmd_desc);
TEST_ASSERT_MESSAGE_FORMATTED(
status == ZDNN_OK,
"zdnn_generate_transformed_desc() failed (status = %08x)", status);
TEST_ASSERT_MESSAGE(ZDNN_OK == zdnn_init_ztensor_with_malloc(
&pre_tfrmd_desc, &tfrmd_desc, &ztensor),
"Unsuccessful zdnn_init_ztensor_with_malloc");
data = create_and_fill_random_fp_data(&ztensor);
data2 = create_and_fill_random_fp_data(&ztensor);
TEST_ASSERT_MESSAGE(ZDNN_OK == zdnn_transform_ztensor(&ztensor, data),
"Unsuccessful first zdnn_transform_ztensor");
zdnn_reset_ztensor(&ztensor);
TEST_ASSERT_MESSAGE(ZDNN_OK == zdnn_transform_ztensor(&ztensor, data2),
"Unsuccessful second zdnn_transform_ztensor");
// Free allocated storage
free(data);
free(data2);
zdnn_free_ztensor_buffer(&ztensor);
}
// Reuse zdnn_ztensor without resetting is_transformed, expects
// ZDNN_BAD_PARAMETER
void test_ztensor_reuse_without_reset() {
zdnn_tensor_desc pre_tfrmd_desc, tfrmd_desc;
zdnn_ztensor ztensor;
zdnn_status status;
unsigned char *data, *data2;
zdnn_init_pre_transformed_desc(ZDNN_NHWC, FP16, &pre_tfrmd_desc, 1, 4, 4, 1);
status = zdnn_generate_transformed_desc(&pre_tfrmd_desc, &tfrmd_desc);
TEST_ASSERT_MESSAGE_FORMATTED(
status == ZDNN_OK,
"zdnn_generate_transformed_desc() failed (status = %08x)", status);
TEST_ASSERT_MESSAGE(ZDNN_OK == zdnn_init_ztensor_with_malloc(
&pre_tfrmd_desc, &tfrmd_desc, &ztensor),
"Unsuccessful zdnn_init_ztensor_with_malloc");
data = create_and_fill_random_fp_data(&ztensor);
data2 = create_and_fill_random_fp_data(&ztensor);
TEST_ASSERT_MESSAGE(ZDNN_OK == zdnn_transform_ztensor(&ztensor, data),
"Unsuccessful first zdnn_transform_ztensor");
TEST_ASSERT_MESSAGE(
ZDNN_INVALID_STATE == zdnn_transform_ztensor(&ztensor, data2),
"Second zdnn_transform_ztensor does not yield ZDNN_INVALID_STATE");
// Free allocated storage
free(data);
free(data2);
zdnn_free_ztensor_buffer(&ztensor);
}
void test_format_after_stickify_4dfeature_success() {
zdnn_tensor_desc pre_tfrmd_desc, tfrmd_desc;
zdnn_ztensor ztensor;
unsigned char *data;
// sabotage ztensor with crap values
memset(&ztensor, 0xFF, sizeof(ztensor));
// doing all these steps absolutely barebone, as the normal testcases should
// have covered verifying the status
zdnn_init_pre_transformed_desc(ZDNN_NHWC, FP16, &pre_tfrmd_desc, 1, 4, 4, 1);
zdnn_generate_transformed_desc(&pre_tfrmd_desc, &tfrmd_desc);
zdnn_init_ztensor_with_malloc(&pre_tfrmd_desc, &tfrmd_desc, &ztensor);
data = create_and_fill_random_fp_data(&ztensor);
zdnn_transform_ztensor(&ztensor, data);
TEST_ASSERT_MESSAGE(ztensor.is_transformed == true,
"Expected is_transformed to be set to true, it is not.");
// Free allocated storage
free(data);
zdnn_free_ztensor_buffer(&ztensor);
}
void test_format_after_stickify_4dfeature_fail() {
zdnn_tensor_desc pre_tfrmd_desc, tfrmd_desc;
zdnn_ztensor ztensor;
unsigned char *data;
// sabotage ztensor with crap values
memset(&ztensor, 0xFF, sizeof(ztensor));
// doing all these steps absolutely barebone, as the normal testcases should
// have covered verifying the status
zdnn_init_pre_transformed_desc(ZDNN_NHWC, FP16, &pre_tfrmd_desc, 1, 4, 4, 1);
zdnn_generate_transformed_desc(&pre_tfrmd_desc, &tfrmd_desc);
zdnn_init_ztensor_with_malloc(&pre_tfrmd_desc, &tfrmd_desc, &ztensor);
data = create_and_fill_random_fp_data(&ztensor);
// sabotage ztensor.pre_transformed_desc so it would fail
ztensor.pre_transformed_desc->type = ZDNN_DLFLOAT16;
zdnn_transform_ztensor(&ztensor, data);
TEST_ASSERT_MESSAGE(ztensor.is_transformed == false,
"Expected is_transformed to be set to false, it is not.");
// Free allocated storage
free(data);
zdnn_free_ztensor_buffer(&ztensor);
}
void test_ztensor_null_buffer() {
zdnn_tensor_desc pre_tfrmd_desc, tfrmd_desc;
zdnn_ztensor ztensor;
unsigned char *data;
zdnn_status status;
zdnn_init_pre_transformed_desc(ZDNN_NHWC, FP16, &pre_tfrmd_desc, 1, 4, 4, 1);
zdnn_generate_transformed_desc(&pre_tfrmd_desc, &tfrmd_desc);
zdnn_init_ztensor_with_malloc(&pre_tfrmd_desc, &tfrmd_desc, &ztensor);
data = create_and_fill_random_fp_data(&ztensor);
// Store buffer pointer before setting to NULL.
void *save_buffer = ztensor.buffer;
ztensor.buffer = NULL;
status = zdnn_transform_ztensor(&ztensor, data);
TEST_ASSERT_MESSAGE_FORMATTED(
status == ZDNN_INVALID_BUFFER,
"zdnn_transform_ztensor() failed (status = %08x, expects = %08x)", status,
ZDNN_DATA_ERROR);
// Free allocated storage
free(data);
// Reset buffer before freeing
ztensor.buffer = save_buffer;
zdnn_free_ztensor_buffer(&ztensor);
}
void test_ztensor_not_enough_buffersize() {
zdnn_tensor_desc pre_tfrmd_desc, tfrmd_desc;
zdnn_ztensor ztensor;
unsigned char *data;
zdnn_status status;
zdnn_init_pre_transformed_desc(ZDNN_NHWC, FP16, &pre_tfrmd_desc, 4, 1, 1, 1);
zdnn_generate_transformed_desc(&pre_tfrmd_desc, &tfrmd_desc);
zdnn_init_ztensor_with_malloc(&pre_tfrmd_desc, &tfrmd_desc, &ztensor);
data = create_and_fill_random_fp_data(&ztensor);
// (4, 1, 1, 1) needs 4 * 4096 bytes
ztensor.buffer_size = 4096;
status = zdnn_transform_ztensor(&ztensor, data);
TEST_ASSERT_MESSAGE_FORMATTED(
status == ZDNN_INVALID_BUFFER,
"zdnn_transform_ztensor() failed (status = %08x, expects = %08x)", status,
ZDNN_DATA_ERROR);
// Free allocated storage
free(data);
zdnn_free_ztensor_buffer(&ztensor);
}
// This routine tests the conversion from FP16 to DLF
// Input: a "bad" value in FP16, which will "trip" the
// floating point exception trigger on VCNF
void test_ztensor_bad_value_FP16(uint16_t bad_value) {
#define INF_FP16_POS 0X7C00
#define INF_FP16_NEG 0xFC00
#define NAN_FP16_POS 0x7FFF
#define NAN_FP16_NEG 0xFFFF
#define STICK_ENTRIES_FP16 7
const uint32_t stick_entries_to_try[STICK_ENTRIES_FP16] = {0, 1, 7, 8,
9, 62, 63};
zdnn_tensor_desc pre_tfrmd_desc, tfrmd_desc;
zdnn_ztensor ztensor;
unsigned char *data;
uint16_t *array; // Alternate view on data
zdnn_init_pre_transformed_desc(ZDNN_NHWC, FP16, &pre_tfrmd_desc, 1, 1, 1, 64);
zdnn_generate_transformed_desc(&pre_tfrmd_desc, &tfrmd_desc);
zdnn_init_ztensor_with_malloc(&pre_tfrmd_desc, &tfrmd_desc, &ztensor);
data = create_and_fill_random_fp_data(&ztensor);
array = (uint16_t *)data; /* use data as an INT array */
for (int i = 0; i < STICK_ENTRIES_FP16; i++) {
zdnn_status status;
array[stick_entries_to_try[i]] = bad_value;
ztensor.is_transformed = false; /* set false for next attempt, required
for underflow case */
status = zdnn_transform_ztensor(&ztensor, data);
TEST_ASSERT_MESSAGE_FORMATTED(
status == ZDNN_CONVERT_FAILURE,
"zdnn_transform_ztensor() succeeded (status = %08x, expects = "
"%08x, i = %d, value = %04x)",
status, ZDNN_CONVERT_FAILURE, i, bad_value);
TEST_ASSERT_MESSAGE_FORMATTED(
ztensor.is_transformed == false,
"zdnn_transform_ztensor() set is_transformed (status = %08x, "
"expects = %08x, i = %d, value = %08x)",
status, ZDNN_CONVERT_FAILURE, i, bad_value);
array[stick_entries_to_try[i]] = 0; // set entry to 0 for next iteration
}
// Free allocated storage
free(data);
zdnn_free_ztensor_buffer(&ztensor);
}
void test_ztensor_fp16_bad_values() {
test_ztensor_bad_value_FP16(
INF_FP16_POS); // is not a number, will cause overflow
test_ztensor_bad_value_FP16(
INF_FP16_NEG); // is not a number, will cause overflow
test_ztensor_bad_value_FP16(
NAN_FP16_POS); // is not a number, will cause invalid op
test_ztensor_bad_value_FP16(
NAN_FP16_NEG); // is not a number, will cause Invalid Op
// Underflow not possible converting FP16 to DLF (VCNF)
}
// This routine tests the conversion from FP32 to DLFloat16
// Input: a "bad" value in FP32, which will "trip" the
// floating point exception trigger on VCRNF
// NOTE: Only Not-A-Number values will trip the exception.
void test_ztensor_bad_value_FP32(uint32_t bad_value) {
#define TOO_SMALL_FP32_POS 0x00000FF0
#define TOO_SMALL_FP32_NEG 0x80000FF0
#define TOO_LARGE_INF_FP32_POS 0x7F800000
#define TOO_LARGE_INF_FP32_NEG 0xFF800000
#define NAN_FP32_POS 0x7FFFFFFF
#define NAN_FP32_NEG 0xFFFFFFFF
#define STICK_ENTRIES_FP32 9
const uint32_t stick_entries_to_try[STICK_ENTRIES_FP32] = {0, 1, 3, 4, 7,
8, 9, 15, 63};
zdnn_tensor_desc pre_tfrmd_desc, tfrmd_desc;
zdnn_ztensor ztensor;
unsigned char *data;
uint32_t *array;
zdnn_init_pre_transformed_desc(ZDNN_NHWC, FP32, &pre_tfrmd_desc, 1, 1, 1, 64);
zdnn_generate_transformed_desc(&pre_tfrmd_desc, &tfrmd_desc);
zdnn_init_ztensor_with_malloc(&pre_tfrmd_desc, &tfrmd_desc, &ztensor);
data = create_and_fill_random_fp_data(&ztensor);
array = (uint32_t *)data; /* use data as an INT array */
for (int i = 0; i < STICK_ENTRIES_FP32; i++) {
zdnn_status status;
array[stick_entries_to_try[i]] = bad_value;
ztensor.is_transformed = false; /* set false for next attempt, required
for underflow case */
status = zdnn_transform_ztensor(&ztensor, data);
zdnn_status expected_status;
bool is_transformed_overflow_result, is_transformed_underflow_result;
// Calculate STRIDE_N_SIZE for the tensor. When STRIDE_N_SIZE >
// STICK_SW_THRESHOLD use hardware stickification otherwise stay in software
// stickification as this shows the greatest performance benefit.
uint64_t STRIDE_N_SIZE =
((uint64_t)tfrmd_desc.dim3 * (uint64_t)tfrmd_desc.dim2 *
(uint64_t)tfrmd_desc.dim1);
// Check if hardware will handle the transformation
if ((zdnn_is_nnpa_function_installed(1, NNPA_TRANSFORM) == true) &&
(STRIDE_N_SIZE > STICK_SW_THRESHOLD)) {
expected_status = ZDNN_ELEMENT_RANGE_VIOLATION;
is_transformed_overflow_result = true;
is_transformed_underflow_result = true;
} else {
expected_status = ZDNN_CONVERT_FAILURE;
is_transformed_overflow_result = false;
is_transformed_underflow_result = true;
}
if (bad_value != TOO_SMALL_FP32_NEG &&
bad_value != TOO_SMALL_FP32_POS) { // if not underflow case
TEST_ASSERT_MESSAGE_FORMATTED(
status == expected_status,
"zdnn_transform_ztensor() with overflow succeeded (status = "
"%08x, expects = "
"%08x, i = %d, value = %08x)",
status, expected_status, i, bad_value);
TEST_ASSERT_MESSAGE_FORMATTED(
ztensor.is_transformed == is_transformed_overflow_result,
"zdnn_transform_ztensor() is_transformed overflow did not get "
"expected %s, (status = %08x, expects = %08x, i = %d, value = %08x)",
is_transformed_overflow_result ? "true" : "false", status,
expected_status, i, bad_value);
} else { // Must be underflow case
TEST_ASSERT_MESSAGE_FORMATTED(
status != expected_status,
"zdnn_transform_ztensor() with underflow did not succeed (status "
"= %08x, expects = "
"%08x, i = %04x, value = %08x)",
status, expected_status, i, bad_value);
TEST_ASSERT_MESSAGE_FORMATTED(
ztensor.is_transformed == is_transformed_underflow_result,
"zdnn_transform_ztensor() is_transformed underflow did not get "
"expected %s, (status = %08x, expects = %08x, i = %d, value = %08x))",
is_transformed_underflow_result ? "true" : "false", status,
expected_status, i, bad_value);
}
array[stick_entries_to_try[i]] = 0; // set entry to 0 for next iteration
}
// Free allocated storage
free(data);
zdnn_free_ztensor_buffer(&ztensor);
}
void test_ztensor_fp32_bad_values() {
test_ztensor_bad_value_FP32(
TOO_SMALL_FP32_POS); // non-zero converts to 0, cause underflow
test_ztensor_bad_value_FP32(
TOO_SMALL_FP32_NEG); // non-zero converts to 0, cause underflow
test_ztensor_bad_value_FP32(
TOO_LARGE_INF_FP32_POS); // is not a number, will cause overflow
test_ztensor_bad_value_FP32(
TOO_LARGE_INF_FP32_NEG); // is not a number, will cause overflow
test_ztensor_bad_value_FP32(
NAN_FP32_POS); // is not a number, will cause invalid op
test_ztensor_bad_value_FP32(
NAN_FP32_NEG); // is not a number, will cause invalid op
}
/**************************************************************
* HWCK
**************************************************************/
void test_hwck_1x4x4x1() { test_stickify(1, 4, 4, 1, ZDNN_HWCK); }
void test_hwck_1x2x3x4() { test_stickify(1, 2, 3, 4, ZDNN_HWCK); }
void test_hwck_2x3x33x129() { test_stickify(2, 3, 33, 129, ZDNN_HWCK); }
void test_hwck_1x32x32x3() { test_stickify(1, 32, 32, 3, ZDNN_HWCK); }
void test_hwck_1x1x32x63() { test_stickify(1, 1, 32, 63, ZDNN_HWCK); }
void test_hwck_1x1x31x64() { test_stickify(1, 1, 31, 64, ZDNN_HWCK); }
void test_hwck_1x1x32x64() { test_stickify(1, 1, 32, 64, ZDNN_HWCK); }
void test_hwck_1x1x33x64() { test_stickify(1, 1, 33, 64, ZDNN_HWCK); }
void test_hwck_1x1x32x65() { test_stickify(1, 1, 32, 65, ZDNN_HWCK); }
void test_hwck_1x1x4x127() { test_stickify(1, 1, 4, 127, ZDNN_HWCK); }
void test_hwck_1x1x4x128() { test_stickify(1, 1, 4, 128, ZDNN_HWCK); }
void test_hwck_1x1x4x129() { test_stickify(1, 1, 4, 129, ZDNN_HWCK); }
void test_hwck_1x1x63x4() { test_stickify(1, 1, 63, 4, ZDNN_HWCK); }
void test_hwck_1x1x64x4() { test_stickify(1, 1, 64, 4, ZDNN_HWCK); }
void test_hwck_1x1x65x4() { test_stickify(1, 1, 65, 4, ZDNN_HWCK); }
/**************************************************************
* NHWC 4DWEIGHT
**************************************************************/
void test_stickify_4dweight(uint32_t dim4, uint32_t dim3, uint32_t dim2,
uint32_t dim1) {
zdnn_tensor_desc pre_tfrmd_desc, tfrmd_desc;
zdnn_ztensor ztensor;
zdnn_status status;
int8_t *data;
zdnn_init_pre_transformed_desc(ZDNN_NHWC, INT8, &pre_tfrmd_desc, dim4, dim3,
dim2, dim1);
status = zdnn_generate_quantized_transformed_desc(
&pre_tfrmd_desc, QUANTIZED_WEIGHTS_INT8, &tfrmd_desc);
TEST_ASSERT_MESSAGE_FORMATTED(
status == ZDNN_OK,
"zdnn_generate_quantized_transformed_desc() failed (status = %08x)",
status);
status = zdnn_init_quantized_ztensor_with_malloc(&pre_tfrmd_desc, &tfrmd_desc,
0, 0, &ztensor);
TEST_ASSERT_MESSAGE_FORMATTED(
status == ZDNN_OK,
"zdnn_init_quantized_ztensor_with_malloc() failed (status = %08x)",
status);
data = create_and_fill_random_int8_data(&ztensor);
status =
zdnn_transform_quantized_ztensor(&ztensor, false, 0, 0, (void *)data);
TEST_ASSERT_MESSAGE_FORMATTED(
status == ZDNN_OK,
"zdnn_transform_quantized_ztensor() failed, status = %08x "
"(%s)",
status, zdnn_get_status_message(status));
uint64_t num_elements = get_num_elements(&ztensor, ELEMENTS_PRE);
size_t *offsets = alloc_offsets(&ztensor);
for (uint64_t i = 0; i < num_elements; i++) {
// value in stick area, int8
int8_t output_value = *(int8_t *)((uintptr_t)ztensor.buffer + offsets[i]);
TEST_ASSERT_MESSAGE_FORMATTED(output_value == data[i],
"Incorrect value at element %" PRIu64
" offset %" PRIu64 ": Stickified: "
"%d, Expected: %d",
i, offsets[i], output_value, data[i]);
}
// Free allocated storage
free(offsets);
free(data);
zdnn_free_ztensor_buffer(&ztensor);
}
/**************************************************************
* NHWC INT8
**************************************************************/
void test_stickify_int8(uint32_t dim4, uint32_t dim3, uint32_t dim2,
uint32_t dim1) {
zdnn_tensor_desc pre_tfrmd_desc, tfrmd_desc;
zdnn_ztensor ztensor;
zdnn_status status;
int8_t *data;
zdnn_init_pre_transformed_desc(ZDNN_NHWC, INT8, &pre_tfrmd_desc, dim4, dim3,
dim2, dim1);
status = zdnn_generate_quantized_transformed_desc(
&pre_tfrmd_desc, QUANTIZED_INT8, &tfrmd_desc);
TEST_ASSERT_MESSAGE_FORMATTED(
status == ZDNN_OK,
"zdnn_generate_quantized_transformed_desc() failed (status = %08x)",
status);
status = zdnn_init_quantized_ztensor_with_malloc(&pre_tfrmd_desc, &tfrmd_desc,
0, 0, &ztensor);
TEST_ASSERT_MESSAGE_FORMATTED(
status == ZDNN_OK,
"zdnn_init_quantized_ztensor_with_malloc() failed (status = %08x)",
status);
data = create_and_fill_random_int8_data(&ztensor);
status =
zdnn_transform_quantized_ztensor(&ztensor, false, 0, 0, (void *)data);
TEST_ASSERT_MESSAGE_FORMATTED(
status == ZDNN_OK,
"zdnn_transform_quantized_ztensor() failed, status = %08x "
"(%s)",
status, zdnn_get_status_message(status));
uint64_t num_elements = get_num_elements(&ztensor, ELEMENTS_PRE);
size_t *offsets = alloc_offsets(&ztensor);
for (uint64_t i = 0; i < num_elements; i++) {
// value in stick area, int8
int8_t output_value = *(int8_t *)((uintptr_t)ztensor.buffer + offsets[i]);
TEST_ASSERT_MESSAGE_FORMATTED(output_value == data[i],
"Incorrect value at element %" PRIu64
" offset %" PRIu64 ": Stickified: "
"%d, Expected: %d",
i, offsets[i], output_value, data[i]);
}
// Free allocated storage
free(offsets);
free(data);
zdnn_free_ztensor_buffer(&ztensor);
}
void test_4dweight_1x4x4x1() { test_stickify_4dweight(1, 4, 4, 1); }
void test_4dweight_1x2x3x4() { test_stickify_4dweight(1, 2, 3, 4); }
void test_4dweight_1x1x1x63() { test_stickify_4dweight(1, 1, 1, 63); }
void test_4dweight_1x1x1x64() { test_stickify_4dweight(1, 1, 1, 64); }
void test_4dweight_1x1x1x65() { test_stickify_4dweight(1, 1, 1, 65); }
void test_4dweight_2x2x3x4() { test_stickify_4dweight(2, 2, 3, 4); }
void test_4dweight_2x2x4x4() { test_stickify_4dweight(2, 2, 3, 4); }
void test_4dweight_2x2x4x63() { test_stickify_4dweight(2, 2, 4, 63); }
void test_4dweight_2x2x4x64() { test_stickify_4dweight(2, 2, 4, 64); }
void test_4dweight_2x2x4x65() { test_stickify_4dweight(2, 2, 4, 65); }
void test_4dweight_2x2x31x4() { test_stickify_4dweight(2, 2, 31, 4); }
void test_4dweight_2x2x32x4() { test_stickify_4dweight(2, 2, 32, 4); }
void test_4dweight_2x2x33x4() { test_stickify_4dweight(2, 2, 33, 4); }
void test_4dweight_3x3x4x127() { test_stickify_4dweight(3, 3, 4, 127); }
void test_4dweight_3x3x4x128() { test_stickify_4dweight(3, 3, 4, 128); }
void test_4dweight_3x3x4x129() { test_stickify_4dweight(3, 3, 4, 129); }
void test_4dweight_4x3x63x10() { test_stickify_4dweight(4, 3, 63, 10); }
void test_4dweight_4x3x64x10() { test_stickify_4dweight(4, 3, 64, 10); }
void test_4dweight_4x3x65x10() { test_stickify_4dweight(4, 3, 65, 10); }
void test_4dweight_2x3x33x129() { test_stickify_4dweight(2, 3, 33, 129); }
void test_int8_1x4x4x1() { test_stickify_int8(1, 4, 4, 1); }
void test_int8_1x2x3x4() { test_stickify_int8(1, 2, 3, 4); }
void test_int8_1x1x1x63() { test_stickify_int8(1, 1, 1, 63); }
void test_int8_1x1x1x64() { test_stickify_int8(1, 1, 1, 64); }
void test_int8_1x1x1x65() { test_stickify_int8(1, 1, 1, 65); }
void test_int8_2x2x3x4() { test_stickify_int8(2, 2, 3, 4); }
void test_int8_2x2x4x4() { test_stickify_int8(2, 2, 3, 4); }
void test_int8_2x2x4x63() { test_stickify_int8(2, 2, 4, 63); }
void test_int8_2x2x4x64() { test_stickify_int8(2, 2, 4, 64); }
void test_int8_2x2x4x65() { test_stickify_int8(2, 2, 4, 65); }
void test_int8_2x2x31x4() { test_stickify_int8(2, 2, 31, 4); }
void test_int8_2x2x32x4() { test_stickify_int8(2, 2, 32, 4); }
void test_int8_2x2x33x4() { test_stickify_int8(2, 2, 33, 4); }
void test_int8_3x3x4x127() { test_stickify_int8(3, 3, 4, 127); }
void test_int8_3x3x4x128() { test_stickify_int8(3, 3, 4, 128); }
void test_int8_3x3x4x129() { test_stickify_int8(3, 3, 4, 129); }
void test_int8_4x3x63x10() { test_stickify_int8(4, 3, 63, 10); }
void test_int8_4x3x64x10() { test_stickify_int8(4, 3, 64, 10); }
void test_int8_4x3x65x10() { test_stickify_int8(4, 3, 65, 10); }
void test_int8_2x3x33x129() { test_stickify_int8(2, 3, 33, 129); }
int main(void) {
UNITY_BEGIN();
RUN_TEST_ALL_DLFLOAT16_PRE_DATATYPES(test_nhwc_1x4x4x1);
RUN_TEST_ALL_DLFLOAT16_PRE_DATATYPES(test_nhwc_1x4x4x2);
RUN_TEST_ALL_DLFLOAT16_PRE_DATATYPES(test_nhwc_1x32x32x1);
RUN_TEST_ALL_DLFLOAT16_PRE_DATATYPES(test_nhwc_1x32x32x2);
RUN_TEST_ALL_DLFLOAT16_PRE_DATATYPES(test_nhwc_1x32x32x3);
RUN_TEST_ALL_DLFLOAT16_PRE_DATATYPES(test_nhwc_1x1x2x1);
RUN_TEST_ALL_DLFLOAT16_PRE_DATATYPES(test_nhwc_1x1x2x2);
RUN_TEST_ALL_DLFLOAT16_PRE_DATATYPES(test_nhwc_1x1x2x4);
RUN_TEST_ALL_DLFLOAT16_PRE_DATATYPES(test_nhwc_1x1x2x7);
RUN_TEST_ALL_DLFLOAT16_PRE_DATATYPES(test_nhwc_1x1x4x1);
RUN_TEST_ALL_DLFLOAT16_PRE_DATATYPES(test_nhwc_1x1x4x2);
RUN_TEST_ALL_DLFLOAT16_PRE_DATATYPES(test_nhwc_1x1x4x4);
RUN_TEST_ALL_DLFLOAT16_PRE_DATATYPES(test_nhwc_1x1x4x7);
RUN_TEST_ALL_DLFLOAT16_PRE_DATATYPES(test_nhwc_1x1x7x1);
RUN_TEST_ALL_DLFLOAT16_PRE_DATATYPES(test_nhwc_1x1x7x2);
RUN_TEST_ALL_DLFLOAT16_PRE_DATATYPES(test_nhwc_1x1x7x4);
RUN_TEST_ALL_DLFLOAT16_PRE_DATATYPES(test_nhwc_1x1x7x7);
RUN_TEST_ALL_DLFLOAT16_PRE_DATATYPES(test_nhwc_1x1x8x1);
RUN_TEST_ALL_DLFLOAT16_PRE_DATATYPES(test_nhwc_1x1x8x2);
RUN_TEST_ALL_DLFLOAT16_PRE_DATATYPES(test_nhwc_1x1x8x4);
RUN_TEST_ALL_DLFLOAT16_PRE_DATATYPES(test_nhwc_1x1x8x7);
RUN_TEST_ALL_DLFLOAT16_PRE_DATATYPES(test_nhwc_1x1x13x1);
RUN_TEST_ALL_DLFLOAT16_PRE_DATATYPES(test_nhwc_1x1x13x2);
RUN_TEST_ALL_DLFLOAT16_PRE_DATATYPES(test_nhwc_1x1x13x4);
RUN_TEST_ALL_DLFLOAT16_PRE_DATATYPES(test_nhwc_1x1x13x7);
RUN_TEST_ALL_DLFLOAT16_PRE_DATATYPES(test_nhwc_1x1x100x1);
RUN_TEST_ALL_DLFLOAT16_PRE_DATATYPES(test_nhwc_1x1x100x2);
RUN_TEST_ALL_DLFLOAT16_PRE_DATATYPES(test_nhwc_1x1x100x4);
RUN_TEST_ALL_DLFLOAT16_PRE_DATATYPES(test_nhwc_1x1x100x7);
RUN_TEST_ALL_DLFLOAT16_PRE_DATATYPES(test_nhwc_2x3x2x1);
RUN_TEST_ALL_DLFLOAT16_PRE_DATATYPES(test_nhwc_2x3x2x2);
RUN_TEST_ALL_DLFLOAT16_PRE_DATATYPES(test_nhwc_2x3x2x4);
RUN_TEST_ALL_DLFLOAT16_PRE_DATATYPES(test_nhwc_2x3x2x7);
RUN_TEST_ALL_DLFLOAT16_PRE_DATATYPES(test_nhwc_2x3x4x1);
RUN_TEST_ALL_DLFLOAT16_PRE_DATATYPES(test_nhwc_2x3x4x2);
RUN_TEST_ALL_DLFLOAT16_PRE_DATATYPES(test_nhwc_2x3x4x4);
RUN_TEST_ALL_DLFLOAT16_PRE_DATATYPES(test_nhwc_2x3x4x7);
RUN_TEST_ALL_DLFLOAT16_PRE_DATATYPES(test_nhwc_2x3x7x1);
RUN_TEST_ALL_DLFLOAT16_PRE_DATATYPES(test_nhwc_2x3x7x2);
RUN_TEST_ALL_DLFLOAT16_PRE_DATATYPES(test_nhwc_2x3x7x4);
RUN_TEST_ALL_DLFLOAT16_PRE_DATATYPES(test_nhwc_2x3x7x7);
RUN_TEST_ALL_DLFLOAT16_PRE_DATATYPES(test_nhwc_2x3x8x1);
RUN_TEST_ALL_DLFLOAT16_PRE_DATATYPES(test_nhwc_2x3x8x2);
RUN_TEST_ALL_DLFLOAT16_PRE_DATATYPES(test_nhwc_2x3x8x4);
RUN_TEST_ALL_DLFLOAT16_PRE_DATATYPES(test_nhwc_2x3x8x7);
RUN_TEST_ALL_DLFLOAT16_PRE_DATATYPES(test_nhwc_2x3x13x1);
RUN_TEST_ALL_DLFLOAT16_PRE_DATATYPES(test_nhwc_2x3x13x2);
RUN_TEST_ALL_DLFLOAT16_PRE_DATATYPES(test_nhwc_2x3x13x4);
RUN_TEST_ALL_DLFLOAT16_PRE_DATATYPES(test_nhwc_2x3x13x7);
RUN_TEST_ALL_DLFLOAT16_PRE_DATATYPES(test_nhwc_2x3x100x1);
RUN_TEST_ALL_DLFLOAT16_PRE_DATATYPES(test_nhwc_2x3x100x2);
RUN_TEST_ALL_DLFLOAT16_PRE_DATATYPES(test_nhwc_2x3x100x4);
RUN_TEST_ALL_DLFLOAT16_PRE_DATATYPES(test_nhwc_2x3x100x7);
RUN_TEST_ALL_DLFLOAT16_PRE_DATATYPES(test_nhwc_3x2x2x1);
RUN_TEST_ALL_DLFLOAT16_PRE_DATATYPES(test_nhwc_3x2x2x2);
RUN_TEST_ALL_DLFLOAT16_PRE_DATATYPES(test_nhwc_3x2x2x4);
RUN_TEST_ALL_DLFLOAT16_PRE_DATATYPES(test_nhwc_3x2x2x7);
RUN_TEST_ALL_DLFLOAT16_PRE_DATATYPES(test_nhwc_3x2x4x1);
RUN_TEST_ALL_DLFLOAT16_PRE_DATATYPES(test_nhwc_3x2x4x2);
RUN_TEST_ALL_DLFLOAT16_PRE_DATATYPES(test_nhwc_3x2x4x4);
RUN_TEST_ALL_DLFLOAT16_PRE_DATATYPES(test_nhwc_3x2x4x7);
RUN_TEST_ALL_DLFLOAT16_PRE_DATATYPES(test_nhwc_3x2x7x1);
RUN_TEST_ALL_DLFLOAT16_PRE_DATATYPES(test_nhwc_3x2x7x2);
RUN_TEST_ALL_DLFLOAT16_PRE_DATATYPES(test_nhwc_3x2x7x4);
RUN_TEST_ALL_DLFLOAT16_PRE_DATATYPES(test_nhwc_3x2x7x7);
RUN_TEST_ALL_DLFLOAT16_PRE_DATATYPES(test_nhwc_3x2x8x1);
RUN_TEST_ALL_DLFLOAT16_PRE_DATATYPES(test_nhwc_3x2x8x2);
RUN_TEST_ALL_DLFLOAT16_PRE_DATATYPES(test_nhwc_3x2x8x4);
RUN_TEST_ALL_DLFLOAT16_PRE_DATATYPES(test_nhwc_3x2x8x7);
RUN_TEST_ALL_DLFLOAT16_PRE_DATATYPES(test_nhwc_3x2x13x1);
RUN_TEST_ALL_DLFLOAT16_PRE_DATATYPES(test_nhwc_3x2x13x2);
RUN_TEST_ALL_DLFLOAT16_PRE_DATATYPES(test_nhwc_3x2x13x4);
RUN_TEST_ALL_DLFLOAT16_PRE_DATATYPES(test_nhwc_3x2x13x7);
RUN_TEST_ALL_DLFLOAT16_PRE_DATATYPES(test_nhwc_3x2x100x1);
RUN_TEST_ALL_DLFLOAT16_PRE_DATATYPES(test_nhwc_3x2x100x2);
RUN_TEST_ALL_DLFLOAT16_PRE_DATATYPES(test_nhwc_3x2x100x4);
RUN_TEST_ALL_DLFLOAT16_PRE_DATATYPES(test_nhwc_3x2x100x7);
RUN_TEST_ALL_DLFLOAT16_PRE_DATATYPES(test_nhwc_1x1x1x4);
RUN_TEST_ALL_DLFLOAT16_PRE_DATATYPES(test_nhwc_1x1x1x5);
RUN_TEST_ALL_DLFLOAT16_PRE_DATATYPES(test_nhwc_1x1x1x8);
RUN_TEST_ALL_DLFLOAT16_PRE_DATATYPES(test_nhwc_1x1x1x9);
RUN_TEST_ALL_DLFLOAT16_PRE_DATATYPES(test_nhwc_1x1x1x63);
RUN_TEST_ALL_DLFLOAT16_PRE_DATATYPES(test_nhwc_1x1x1x64);
RUN_TEST_ALL_DLFLOAT16_PRE_DATATYPES(test_nhwc_1x1x1x65);
RUN_TEST_ALL_DLFLOAT16_PRE_DATATYPES(test_nhwc_1x1x1x127);
RUN_TEST_ALL_DLFLOAT16_PRE_DATATYPES(test_nhwc_1x1x1x128);
RUN_TEST_ALL_DLFLOAT16_PRE_DATATYPES(test_nhwc_1x2x3x4);
RUN_TEST_ALL_DLFLOAT16_PRE_DATATYPES(test_nhwc_1x1x31x64);
RUN_TEST_ALL_DLFLOAT16_PRE_DATATYPES(test_nhwc_1x1x32x64);
RUN_TEST_ALL_DLFLOAT16_PRE_DATATYPES(test_nhwc_1x1x33x64);
RUN_TEST_ALL_DLFLOAT16_PRE_DATATYPES(test_nhwc_1x1x32x63);
RUN_TEST_ALL_DLFLOAT16_PRE_DATATYPES(test_nhwc_1x1x32x65);
RUN_TEST_ALL_DLFLOAT16_PRE_DATATYPES(test_nhwc_1x1x4x127);
RUN_TEST_ALL_DLFLOAT16_PRE_DATATYPES(test_nhwc_1x1x4x128);
RUN_TEST_ALL_DLFLOAT16_PRE_DATATYPES(test_nhwc_1x1x4x129);
RUN_TEST_ALL_DLFLOAT16_PRE_DATATYPES(test_nhwc_1x1x63x4);
RUN_TEST_ALL_DLFLOAT16_PRE_DATATYPES(test_nhwc_1x1x64x4);
RUN_TEST_ALL_DLFLOAT16_PRE_DATATYPES(test_nhwc_1x1x65x4);
RUN_TEST_ALL_DLFLOAT16_PRE_DATATYPES(test_nhwc_2x3x33x129);
RUN_TEST_ALL_DLFLOAT16_PRE_DATATYPES(test_3ds_4x4x1);
RUN_TEST_ALL_DLFLOAT16_PRE_DATATYPES(test_3ds_32x32x3);
RUN_TEST_ALL_DLFLOAT16_PRE_DATATYPES(test_2ds_4x2);
RUN_TEST_ALL_DLFLOAT16_PRE_DATATYPES(test_2ds_2x2049);
RUN_TEST_ALL_DLFLOAT16_PRE_DATATYPES(test_lstm_biases_1x4);
RUN_TEST_ALL_DLFLOAT16_PRE_DATATYPES(test_lstm_biases_2x4);
RUN_TEST_ALL_DLFLOAT16_PRE_DATATYPES(test_lstm_biases_2x65);
RUN_TEST_ALL_DLFLOAT16_PRE_DATATYPES(test_lstm_biases_2x2049);
RUN_TEST_ALL_DLFLOAT16_PRE_DATATYPES(test_lstm_no_vconcat_weights_1x3x4);
RUN_TEST_ALL_DLFLOAT16_PRE_DATATYPES(test_lstm_no_vconcat_weights_2x3x4);
RUN_TEST_ALL_DLFLOAT16_PRE_DATATYPES(test_lstm_no_vconcat_weights_2x33x65);
RUN_TEST_ALL_DLFLOAT16_PRE_DATATYPES(test_lstm_prev_bidir_weights_1x6x4);
RUN_TEST_ALL_DLFLOAT16_PRE_DATATYPES(test_lstm_prev_bidir_weights_2x6x4);
RUN_TEST_ALL_DLFLOAT16_PRE_DATATYPES(test_lstm_prev_bidir_weights_2x66x65);
RUN_TEST_ALL_DLFLOAT16_PRE_DATATYPES(test_gru_biases_1x4);
RUN_TEST_ALL_DLFLOAT16_PRE_DATATYPES(test_gru_biases_2x4);
RUN_TEST_ALL_DLFLOAT16_PRE_DATATYPES(test_gru_biases_2x65);
RUN_TEST_ALL_DLFLOAT16_PRE_DATATYPES(test_gru_biases_2x2049);
RUN_TEST_ALL_DLFLOAT16_PRE_DATATYPES(test_gru_no_vconcat_weights_1x3x4);
RUN_TEST_ALL_DLFLOAT16_PRE_DATATYPES(test_gru_no_vconcat_weights_2x3x4);
RUN_TEST_ALL_DLFLOAT16_PRE_DATATYPES(test_gru_no_vconcat_weights_2x33x65);
RUN_TEST_ALL_DLFLOAT16_PRE_DATATYPES(test_gru_prev_bidir_weights_1x6x4);
RUN_TEST_ALL_DLFLOAT16_PRE_DATATYPES(test_gru_prev_bidir_weights_2x6x4);
RUN_TEST_ALL_DLFLOAT16_PRE_DATATYPES(test_gru_prev_bidir_weights_2x66x65);
RUN_TEST_ALL_DLFLOAT16_PRE_DATATYPES(
test_lstm_no_vconcat_weights_odd_dim2_pass);
RUN_TEST_ALL_DLFLOAT16_PRE_DATATYPES(
test_lstm_prev_bidir_weights_odd_dim2_fail);
RUN_TEST_ALL_DLFLOAT16_PRE_DATATYPES(
test_gru_no_vconcat_weights_odd_dim2_pass);
RUN_TEST_ALL_DLFLOAT16_PRE_DATATYPES(
test_gru_prev_bidir_weights_odd_dim2_fail);
RUN_TEST_ALL_DLFLOAT16_PRE_DATATYPES(test_nchw_1x1x4x4);
RUN_TEST_ALL_DLFLOAT16_PRE_DATATYPES(test_nchw_1x4x2x3);
RUN_TEST_ALL_DLFLOAT16_PRE_DATATYPES(test_nchw_1x3x32x32);
RUN_TEST_ALL_DLFLOAT16_PRE_DATATYPES(test_nchw_2x129x3x33);
RUN_TEST_ALL_DLFLOAT16_PRE_DATATYPES(test_nchw_1x63x1x32);
RUN_TEST_ALL_DLFLOAT16_PRE_DATATYPES(test_nchw_1x64x1x31);
RUN_TEST_ALL_DLFLOAT16_PRE_DATATYPES(test_nchw_1x64x1x32);
RUN_TEST_ALL_DLFLOAT16_PRE_DATATYPES(test_nchw_1x64x1x33);
RUN_TEST_ALL_DLFLOAT16_PRE_DATATYPES(test_nchw_1x65x1x32);
RUN_TEST_ALL_DLFLOAT16_PRE_DATATYPES(test_nchw_1x127x1x4);
RUN_TEST_ALL_DLFLOAT16_PRE_DATATYPES(test_nchw_1x128x1x4);
RUN_TEST_ALL_DLFLOAT16_PRE_DATATYPES(test_nchw_1x129x1x4);
RUN_TEST_ALL_DLFLOAT16_PRE_DATATYPES(test_nchw_1x4x1x63);
RUN_TEST_ALL_DLFLOAT16_PRE_DATATYPES(test_nchw_1x4x1x64);
RUN_TEST_ALL_DLFLOAT16_PRE_DATATYPES(test_nchw_1x4x1x65);
RUN_TEST_ALL_DLFLOAT16_PRE_DATATYPES(test_nhwc_nchw_comp_1x4x4x1);
RUN_TEST_ALL_DLFLOAT16_PRE_DATATYPES(test_nhwc_nchw_comp_1x32x32x3);
RUN_TEST_ALL_DLFLOAT16_PRE_DATATYPES(test_nhwc_nchw_comp_2x3x33x129);
RUN_TEST_ALL_DLFLOAT16_PRE_DATATYPES(test_hwck_1x4x4x1);
RUN_TEST_ALL_DLFLOAT16_PRE_DATATYPES(test_hwck_1x2x3x4);
RUN_TEST_ALL_DLFLOAT16_PRE_DATATYPES(test_hwck_2x3x33x129);
RUN_TEST_ALL_DLFLOAT16_PRE_DATATYPES(test_hwck_1x32x32x3);
RUN_TEST_ALL_DLFLOAT16_PRE_DATATYPES(test_hwck_1x1x32x63);
RUN_TEST_ALL_DLFLOAT16_PRE_DATATYPES(test_hwck_1x1x31x64);
RUN_TEST_ALL_DLFLOAT16_PRE_DATATYPES(test_hwck_1x1x32x64);
RUN_TEST_ALL_DLFLOAT16_PRE_DATATYPES(test_hwck_1x1x33x64);
RUN_TEST_ALL_DLFLOAT16_PRE_DATATYPES(test_hwck_1x1x32x65);
RUN_TEST_ALL_DLFLOAT16_PRE_DATATYPES(test_hwck_1x1x4x127);
RUN_TEST_ALL_DLFLOAT16_PRE_DATATYPES(test_hwck_1x1x4x128);
RUN_TEST_ALL_DLFLOAT16_PRE_DATATYPES(test_hwck_1x1x4x129);
RUN_TEST_ALL_DLFLOAT16_PRE_DATATYPES(test_hwck_1x1x63x4);
RUN_TEST_ALL_DLFLOAT16_PRE_DATATYPES(test_hwck_1x1x64x4);
RUN_TEST_ALL_DLFLOAT16_PRE_DATATYPES(test_hwck_1x1x65x4);
RUN_TEST(test_4dweight_1x4x4x1);
RUN_TEST(test_4dweight_1x2x3x4);
RUN_TEST(test_4dweight_1x1x1x63);
RUN_TEST(test_4dweight_1x1x1x64);
RUN_TEST(test_4dweight_1x1x1x65);
RUN_TEST(test_4dweight_2x2x3x4);
RUN_TEST(test_4dweight_2x2x4x4);
RUN_TEST(test_4dweight_2x2x4x63);
RUN_TEST(test_4dweight_2x2x4x64);
RUN_TEST(test_4dweight_2x2x4x65);
RUN_TEST(test_4dweight_2x2x31x4);
RUN_TEST(test_4dweight_2x2x32x4);
RUN_TEST(test_4dweight_2x2x33x4);
RUN_TEST(test_4dweight_3x3x4x127);
RUN_TEST(test_4dweight_3x3x4x128);
RUN_TEST(test_4dweight_3x3x4x129);
RUN_TEST(test_4dweight_4x3x63x10);
RUN_TEST(test_4dweight_4x3x64x10);
RUN_TEST(test_4dweight_4x3x65x10);
RUN_TEST(test_4dweight_2x3x33x129);
RUN_TEST(test_int8_1x4x4x1);
RUN_TEST(test_int8_1x2x3x4);
RUN_TEST(test_int8_1x1x1x63);
RUN_TEST(test_int8_1x1x1x64);
RUN_TEST(test_int8_1x1x1x65);
RUN_TEST(test_int8_2x2x3x4);
RUN_TEST(test_int8_2x2x4x4);
RUN_TEST(test_int8_2x2x4x63);
RUN_TEST(test_int8_2x2x4x64);
RUN_TEST(test_int8_2x2x4x65);
RUN_TEST(test_int8_2x2x31x4);
RUN_TEST(test_int8_2x2x32x4);
RUN_TEST(test_int8_2x2x33x4);
RUN_TEST(test_int8_3x3x4x127);
RUN_TEST(test_int8_3x3x4x128);
RUN_TEST(test_int8_3x3x4x129);
RUN_TEST(test_int8_4x3x63x10);
RUN_TEST(test_int8_4x3x64x10);
RUN_TEST(test_int8_4x3x65x10);
RUN_TEST(test_int8_2x3x33x129);
RUN_TEST_ALL_DLFLOAT16_PRE_DATATYPES(test_ztensor_reuse_with_reset);
RUN_TEST_ALL_DLFLOAT16_PRE_DATATYPES(test_ztensor_reuse_without_reset);
RUN_TEST_ALL_DLFLOAT16_PRE_DATATYPES(
test_format_after_stickify_4dfeature_success);
RUN_TEST_ALL_DLFLOAT16_PRE_DATATYPES(
test_format_after_stickify_4dfeature_fail);
RUN_TEST_ALL_DLFLOAT16_PRE_DATATYPES(test_ztensor_null_buffer);
RUN_TEST_ALL_DLFLOAT16_PRE_DATATYPES(test_ztensor_not_enough_buffersize);
RUN_TEST(test_ztensor_fp16_bad_values);
RUN_TEST(test_ztensor_fp32_bad_values);
return UNITY_END();
}
zDNN-1.1.2/tests/testDriver_stickify_saturation.c 0000664 0000000 0000000 00000046373 15000221702 0022145 0 ustar 00root root 0000000 0000000 // SPDX-License-Identifier: Apache-2.0
/*
* Copyright IBM Corp. 2024, 2025
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#include
#include
#include
#include
#include
#include "testsupport.h"
#define INF_FP16_POS 0X7C00
#define INF_FP16_NEG 0xFC00
#define NAN_FP16_POS 0x7FFF
#define NAN_FP16_NEG 0xFFFF
#define INF_FP32_POS \
(((union { \
int i; \
float f; \
}){0x7F800000}) \
.f)
#define INF_FP32_NEG \
(((union { \
int i; \
float f; \
}){0xFF800000}) \
.f)
#define NAN_FP32_POS \
(((union { \
int i; \
float f; \
}){0x7FFFFFFF}) \
.f)
#define NAN_FP32_NEG \
(((union { \
int i; \
float f; \
}){0xFFFFFFFF}) \
.f)
zdnn_status default_unstick_expected_status = ZDNN_OK;
zdnn_status default_saturate_expected_status = ZDNN_OK;
void setUp(void) {}
void tearDown(void) {}
float fp32_saturation_value(float value) {
// Expected Saturation value for -NAN,NAN,-INF,INF should be NAN
if (isnan(value) || isinf(value))
return NAN;
float tmp = (value > DLF16_MAX_AS_FP32) ? DLF16_MAX_AS_FP32 : value;
return (tmp < DLF16_MIN_AS_FP32) ? DLF16_MIN_AS_FP32 : tmp;
}
uint16_t bfloat_saturation_value(uint16_t value) {
typedef struct float_as_uint16s {
uint16_t left;
uint16_t right;
} float_as_uint16s;
union {
float f;
float_as_uint16s fau;
} tmp;
tmp.fau.left = value;
tmp.fau.right = 0;
if (tmp.f > DLF16_MAX_AS_FP32) {
return DLF16_MAX_AS_BFLOAT;
} else if (tmp.f < DLF16_MIN_AS_FP32) {
return DLF16_MIN_AS_BFLOAT;
} else {
return value;
}
}
void test_stickify_with_saturation_dims(zdnn_data_layouts layout,
zdnn_data_types type, void *value,
uint32_t dim4, uint32_t dim3,
uint32_t dim2, uint32_t dim1,
zdnn_status saturation_expected_status,
zdnn_status unstick_expected_status) {
zdnn_tensor_desc pre_tfrmd_desc, tfrmd_desc;
zdnn_ztensor ztensor;
zdnn_status stick_status, unstick_status;
switch (layout) {
case (ZDNN_1D):
zdnn_init_pre_transformed_desc(layout, type, &pre_tfrmd_desc, dim1);
break;
case (ZDNN_2D):
case (ZDNN_2DS):
zdnn_init_pre_transformed_desc(layout, type, &pre_tfrmd_desc, dim2, dim1);
break;
case (ZDNN_3D):
case (ZDNN_3DS):
zdnn_init_pre_transformed_desc(layout, type, &pre_tfrmd_desc, dim3, dim2,
dim1);
break;
case (ZDNN_ZRH):
case (ZDNN_FICO):
case (ZDNN_BIDIR_ZRH):
case (ZDNN_BIDIR_FICO):
zdnn_init_pre_transformed_desc(ZDNN_NHWC, type, &pre_tfrmd_desc, dim4, dim3,
dim2, dim1);
break;
default:
zdnn_init_pre_transformed_desc(layout, type, &pre_tfrmd_desc, dim4, dim3,
dim2, dim1);
}
zdnn_generate_transformed_desc(&pre_tfrmd_desc, &tfrmd_desc);
zdnn_init_ztensor_with_malloc(&pre_tfrmd_desc, &tfrmd_desc, &ztensor);
uint64_t num_elements = get_num_elements(&ztensor, ELEMENTS_AIU);
uint64_t element_size = (FP32) ? 4 : 2; // FP32 = 4 bytes, BFLOAT = 2 bytes
void *in_data = malloc(num_elements * element_size);
void *saturated_data = malloc(num_elements * element_size);
void *out_data = malloc(num_elements * element_size);
// Check if any allocations failed.
if (in_data == NULL || saturated_data == NULL || out_data == NULL) {
free(in_data);
free(saturated_data);
free(out_data);
TEST_FAIL_MESSAGE("Unable to allocate required data");
}
for (uint64_t i = 0; i < num_elements; i++) {
if (type == FP32) {
((float *)(in_data))[i] = *((float *)(value));
((float *)(saturated_data))[i] =
fp32_saturation_value(*((float *)(value)));
} else {
((uint16_t *)(in_data))[i] = *((uint16_t *)(value));
((uint16_t *)(saturated_data))[i] =
(type == BFLOAT) ? bfloat_saturation_value(*((uint16_t *)(value)))
: *((uint16_t *)(value));
}
}
stick_status = zdnn_transform_ztensor_with_saturation(&ztensor, in_data);
// Unable to unstickify HWCK. As only 4 elements are passed. Override format
// and layouts to satisfy unstickifying.
if (layout == ZDNN_HWCK) {
ztensor.transformed_desc->format = ZDNN_FORMAT_4DFEATURE;
ztensor.transformed_desc->layout = ZDNN_NHWC;
ztensor.pre_transformed_desc->layout = ZDNN_NHWC;
}
bool values_match = true;
unstick_status = zdnn_transform_origtensor(&ztensor, out_data);
// no need to check output if is_transformed set to false
if (ztensor.is_transformed == true) {
for (uint64_t i = 0; i < num_elements; i++) {
if (type == FP32) {
// check if out values and saturated data are not equal but only if the
// values are BOTH not NAN.
if ((((float *)(out_data))[i]) != (((float *)(saturated_data))[i]) &&
!isnan((((float *)(out_data))[i])) &&
!isnan((((float *)(saturated_data))[i]))) {
values_match = false;
printf("Index: %" PRId64 " fp32 value: %f not saturated properly. "
"Expected %f, input was: %f\n",
i, ((float *)(out_data))[i], ((float *)(saturated_data))[i],
((float *)(in_data))[i]);
}
} else {
if ((((uint16_t *)(out_data))[i]) !=
(((uint16_t *)(saturated_data))[i])) {
values_match = false;
printf("Index: %" PRId64 " bfloat value: %hu not saturated properly. "
"Expected %hu, input was: %hu\n",
i, ((uint16_t *)(out_data))[i],
((uint16_t *)(saturated_data))[i], ((uint16_t *)(in_data))[i]);
}
}
}
}
free(in_data);
free(saturated_data);
free(out_data);
zdnn_free_ztensor_buffer(&ztensor);
TEST_ASSERT_MESSAGE_FORMATTED(stick_status == saturation_expected_status,
"zdnn_transform_ztensor_with_saturation() "
"failed (status = %08x, expects = %08x)",
stick_status, saturation_expected_status);
TEST_ASSERT_MESSAGE_FORMATTED(unstick_status == unstick_expected_status,
"zdnn_transform_origtensor() "
"failed (status = %08x, expects = %08x)",
unstick_status, unstick_expected_status);
// When stick status is ZDNN_CONVERT_FAILURE (fp16 nan/inf) need not assert as
// ztensor.is_transformed is false
if (stick_status != ZDNN_CONVERT_FAILURE) {
TEST_ASSERT_MESSAGE(values_match == true,
"values aren't saturated properly.");
}
}
void test_stickify_with_saturation_float(zdnn_data_layouts layout, float value,
zdnn_status saturation_expected_status,
zdnn_status unstick_expected_status) {
test_stickify_with_saturation_dims(layout, FP32, (void *)&value, 1, 1, 1, 4,
saturation_expected_status,
unstick_expected_status);
}
void test_stickify_with_saturation_fp16(zdnn_data_layouts layout,
uint16_t value,
zdnn_status saturation_expected_status,
zdnn_status unstick_expected_status) {
test_stickify_with_saturation_dims(layout, FP16, (void *)&value, 1, 1, 1, 4,
saturation_expected_status,
unstick_expected_status);
}
void test_stickify_with_saturation_fp32(zdnn_data_layouts layout, float value,
uint32_t dim4, uint32_t dim3,
uint32_t dim2, uint32_t dim1,
zdnn_status saturation_expected_status,
zdnn_status unstick_expected_status) {
test_stickify_with_saturation_dims(layout, FP32, (void *)&value, dim4, dim3,
dim2, dim1, saturation_expected_status,
unstick_expected_status);
}
void test_stickify_with_saturation_bfloat(
zdnn_data_layouts layout, float value,
zdnn_status saturation_expected_status,
zdnn_status unstick_expected_status) {
uint16_t bfloat_value = cnvt_1_fp32_to_bfloat(value);
test_stickify_with_saturation_dims(layout, BFLOAT, (void *)&bfloat_value, 1,
1, 1, 4, saturation_expected_status,
unstick_expected_status);
}
// Basic zdnn_transform_ztensor_with_saturation test.
// No errors; drive all acceptable layouts for FP32
// Expect ZDNN_OK
void saturation_basic() {
zdnn_data_layouts layouts[] = {ZDNN_1D, ZDNN_2D, ZDNN_2DS, ZDNN_3D,
ZDNN_3DS, ZDNN_4D, ZDNN_NHWC};
for (int i = 0; i < (sizeof(layouts) / sizeof(layouts[0])); i++) {
test_stickify_with_saturation_float(layouts[i], 100,
default_saturate_expected_status,
default_unstick_expected_status);
}
}
void saturation_basic_small() {
zdnn_data_layouts layouts[] = {ZDNN_1D, ZDNN_2D, ZDNN_2DS, ZDNN_3D,
ZDNN_3DS, ZDNN_4D, ZDNN_NHWC};
for (int i = 0; i < (sizeof(layouts) / sizeof(layouts[0])); i++) {
test_stickify_with_saturation_float(layouts[i], 0.5,
default_saturate_expected_status,
default_unstick_expected_status);
}
}
void saturation_basic_hwck() {
zdnn_data_layouts layouts[] = {ZDNN_HWCK};
for (int i = 0; i < (sizeof(layouts) / sizeof(layouts[0])); i++) {
test_stickify_with_saturation_float(layouts[i], 100,
default_saturate_expected_status,
default_unstick_expected_status);
}
}
// Basic zdnn_transform_ztensor_with_saturation test.
// No errors; Drive max DLFLOAT value.
// Expect ZDNN_OK
void saturation_basic_match_max() {
test_stickify_with_saturation_float(ZDNN_NHWC, DLF16_MAX_AS_FP32,
default_saturate_expected_status,
default_unstick_expected_status);
}
// Basic zdnn_transform_ztensor_with_saturation test.
// No errors; Drive max FP32 value.
// Expect ZDNN_OK
void saturation_basic_exceed_max() {
test_stickify_with_saturation_float(ZDNN_NHWC, FLT_MAX,
default_saturate_expected_status,
default_unstick_expected_status);
}
// Basic zdnn_transform_ztensor_with_saturation test.
// No errors; Drive min DLFLOAT value.
// Expect ZDNN_OK
void saturation_basic_match_min() {
test_stickify_with_saturation_float(ZDNN_NHWC, DLF16_MIN_AS_FP32,
default_saturate_expected_status,
default_unstick_expected_status);
}
// Basic zdnn_transform_ztensor_with_saturation test.
// No errors; Drive min FP32 value.
// Expect ZDNN_OK
void saturation_basic_exceed_min() {
test_stickify_with_saturation_float(ZDNN_NHWC, -FLT_MAX,
default_saturate_expected_status,
default_unstick_expected_status);
}
// Basic zdnn_transform_ztensor_with_saturation test.
// No errors; drive all acceptable layouts for bfloat
// Expect ZDNN_OK
void saturation_basic_bfloat() {
zdnn_data_layouts layouts[] = {ZDNN_1D, ZDNN_2D, ZDNN_2DS, ZDNN_3D,
ZDNN_3DS, ZDNN_4D, ZDNN_NHWC};
for (int i = 0; i < (sizeof(layouts) / sizeof(layouts[0])); i++) {
test_stickify_with_saturation_bfloat(layouts[i], 100,
default_saturate_expected_status,
default_unstick_expected_status);
}
}
// Basic zdnn_transform_ztensor_with_saturation test.
// No errors; Drive max DLFLOAT value.
// Expect ZDNN_OK
void saturation_basic_match_max_bfloat() {
test_stickify_with_saturation_bfloat(ZDNN_NHWC, DLF16_MAX_AS_FP32,
default_saturate_expected_status,
default_unstick_expected_status);
}
// Basic zdnn_transform_ztensor_with_saturation test.
// No errors; Drive max bfloat value.
// Expect ZDNN_OK
void saturation_basic_exceed_max_bfloat() {
test_stickify_with_saturation_bfloat(ZDNN_NHWC, FLT_MAX,
default_saturate_expected_status,
default_unstick_expected_status);
}
// Basic zdnn_transform_ztensor_with_saturation test.
// No errors; Drive min dlfloat value.
// Expect ZDNN_OK
void saturation_basic_match_min_bfloat() {
test_stickify_with_saturation_bfloat(ZDNN_NHWC, DLF16_MIN_AS_FP32,
default_saturate_expected_status,
default_unstick_expected_status);
}
// Basic zdnn_transform_ztensor_with_saturation test.
// No errors; Drive min bfloat value.
// Expect ZDNN_OK
void saturation_basic_exceed_min_bfloat() {
test_stickify_with_saturation_bfloat(ZDNN_NHWC, -FLT_MAX,
default_saturate_expected_status,
default_unstick_expected_status);
}
// FP32 NAN
void saturation_basic_fp32_nan() {
// stickification status is always the same for hw/sw
zdnn_status saturation_expected_status = ZDNN_ELEMENT_RANGE_VIOLATION;
// Test set #1
// Small tensor to stay under STICK_SW_THRESHOLD to exercise correct unstick
// status
uint32_t dim4 = 1;
uint32_t dim3 = 1;
uint32_t dim2 = 1;
uint32_t dim1 = 4;
// These following tests will always stay in SW (e.g., not to the AIU) as the
// product of pre-transformed dim[1..3] product will be <
// STICK_SW_THRESHOLD so we expect ZDNN_CONVERT_FAILURE for unstick
// see: n_stride_meets_hardware_limit
zdnn_status expected_unstick_status = ZDNN_CONVERT_FAILURE;
test_stickify_with_saturation_fp32(ZDNN_NHWC, INF_FP32_POS, dim4, dim3, dim2,
dim1, ZDNN_ELEMENT_RANGE_VIOLATION,
expected_unstick_status);
test_stickify_with_saturation_fp32(ZDNN_NHWC, INF_FP32_NEG, dim4, dim3, dim2,
dim1, saturation_expected_status,
expected_unstick_status);
test_stickify_with_saturation_fp32(ZDNN_NHWC, NAN_FP32_NEG, dim4, dim3, dim2,
dim1, saturation_expected_status,
expected_unstick_status);
test_stickify_with_saturation_fp32(ZDNN_NHWC, NAN_FP32_POS, dim4, dim3, dim2,
dim1, saturation_expected_status,
expected_unstick_status);
// Test set #2
// Larger tensor to go over the STICK_SW_THRESHOLD to exercise correct unstick
// status. When NNPA_TRANSFORM == true the (un)stickification is done on HW so
// expect ZDNN_ELEMENT_RANGE_VIOLATION for unstick. When NNPA_TRANSFORM
// != true expect ZDNN_CONVERT_FAILURE for unstick as this is done in SW
dim4 = 1;
dim3 = 1;
dim2 = 1;
dim1 = 4096;
if (zdnn_is_nnpa_function_installed(1, NNPA_TRANSFORM) == true) {
expected_unstick_status = ZDNN_ELEMENT_RANGE_VIOLATION;
} else {
expected_unstick_status = ZDNN_CONVERT_FAILURE;
}
test_stickify_with_saturation_fp32(ZDNN_NHWC, INF_FP32_POS, dim4, dim3, dim2,
dim1, saturation_expected_status,
expected_unstick_status);
test_stickify_with_saturation_fp32(ZDNN_NHWC, INF_FP32_NEG, dim4, dim3, dim2,
dim1, saturation_expected_status,
expected_unstick_status);
test_stickify_with_saturation_fp32(ZDNN_NHWC, NAN_FP32_NEG, dim4, dim3, dim2,
dim1, saturation_expected_status,
expected_unstick_status);
test_stickify_with_saturation_fp32(ZDNN_NHWC, NAN_FP32_POS, dim4, dim3, dim2,
dim1, saturation_expected_status,
expected_unstick_status);
}
// FP16 NAN
// Expect: ZDNN_CONVERT_FAILURE
void saturation_basic_fp16_nan() {
test_stickify_with_saturation_fp16(ZDNN_NHWC, INF_FP16_NEG,
ZDNN_CONVERT_FAILURE, ZDNN_INVALID_STATE);
test_stickify_with_saturation_fp16(ZDNN_NHWC, INF_FP16_POS,
ZDNN_CONVERT_FAILURE, ZDNN_INVALID_STATE);
test_stickify_with_saturation_fp16(ZDNN_NHWC, NAN_FP16_POS,
ZDNN_CONVERT_FAILURE, ZDNN_INVALID_STATE);
test_stickify_with_saturation_fp16(ZDNN_NHWC, NAN_FP16_NEG,
ZDNN_CONVERT_FAILURE, ZDNN_INVALID_STATE);
}
int main(void) {
UNITY_BEGIN();
RUN_TEST(saturation_basic);
RUN_TEST(saturation_basic_small);
RUN_TEST(saturation_basic_hwck);
RUN_TEST(saturation_basic_match_max);
RUN_TEST(saturation_basic_exceed_max);
RUN_TEST(saturation_basic_match_min);
RUN_TEST(saturation_basic_exceed_min);
RUN_TEST(saturation_basic_bfloat);
RUN_TEST(saturation_basic_match_max_bfloat);
RUN_TEST(saturation_basic_exceed_max_bfloat);
RUN_TEST(saturation_basic_match_min_bfloat);
RUN_TEST(saturation_basic_exceed_min_bfloat);
RUN_TEST(saturation_basic_fp32_nan);
RUN_TEST(saturation_basic_fp16_nan);
return UNITY_END();
}
zDNN-1.1.2/tests/testDriver_tensor_desc.c 0000664 0000000 0000000 00000053162 15000221702 0020351 0 ustar 00root root 0000000 0000000 // SPDX-License-Identifier: Apache-2.0
/*
* Copyright IBM Corp. 2021, 2024
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#include "testsupport.h"
#include
#include
#include
void setUp(void) { VERIFY_HW_ENV; }
void tearDown(void) {}
// convenience routine for init and verify (transformed)
void set_and_verify_transformed_descriptor(
uint32_t dims[], zdnn_data_layouts layout, zdnn_data_types type,
zdnn_data_formats format, zdnn_status exp_status, char *error_msg) {
zdnn_status status;
zdnn_tensor_desc tfrmd_desc;
init_transformed_desc(layout, type, format, &tfrmd_desc, dims[0], dims[1],
dims[2], dims[3]);
status = verify_transformed_descriptor(&tfrmd_desc);
TEST_ASSERT_MESSAGE_FORMATTED(status == exp_status, "%s (%08x)", error_msg,
status);
}
void verify_dims() {
uint32_t zero_dim[ZDNN_MAX_DIMS] = {0, 1, 1, 1};
uint32_t limit_minus1[ZDNN_MAX_DIMS] = {1, zdnn_get_max_for_dim(3) - 1, 1, 1};
uint32_t at_limit[ZDNN_MAX_DIMS] = {1, 1, zdnn_get_max_for_dim(2), 1};
uint32_t limit_plus1[ZDNN_MAX_DIMS] = {1, 1, zdnn_get_max_for_dim(2) + 1, 1};
set_and_verify_transformed_descriptor(
zero_dim, ZDNN_NHWC, test_datatype, ZDNN_FORMAT_4DFEATURE,
ZDNN_INVALID_SHAPE, "Not returning ZDNN_INVALID_SHAPE for 0 dim tensor");
set_and_verify_transformed_descriptor(
limit_minus1, ZDNN_NHWC, test_datatype, ZDNN_FORMAT_4DFEATURE, ZDNN_OK,
"Not returning ZDNN_OK for below dims limit tensor");
set_and_verify_transformed_descriptor(
at_limit, ZDNN_NHWC, test_datatype, ZDNN_FORMAT_4DFEATURE, ZDNN_OK,
"Not returning ZDNN_OK for at dims limit tensor");
set_and_verify_transformed_descriptor(
limit_plus1, ZDNN_NHWC, test_datatype, ZDNN_FORMAT_4DFEATURE,
ZDNN_INVALID_SHAPE,
"Not returning ZDNN_INVALID_SHAPE for above dims limit tensor");
}
void verify_max_tensor_size() {
uint32_t max_dim_size = zdnn_get_nnpa_max_dim_idx_size();
// try to come up with dim3 so that (1, dim3, max_dim_size, max_dim_size)
// would sit right at the MAX TENSOR SIZE limit
uint32_t dim3 =
zdnn_get_nnpa_max_tensor_size() / (max_dim_size / AIU_STICKS_PER_PAGE) /
(max_dim_size / AIU_2BYTE_CELLS_PER_STICK) / AIU_PAGESIZE_IN_BYTES;
unsigned int limit_minus1[ZDNN_MAX_DIMS] = {1, dim3, max_dim_size - 1,
max_dim_size};
unsigned int at_limit[ZDNN_MAX_DIMS] = {1, dim3, max_dim_size, max_dim_size};
unsigned int limit_plus1[ZDNN_MAX_DIMS] = {1, dim3, max_dim_size + 1,
max_dim_size};
set_and_verify_transformed_descriptor(
limit_minus1, ZDNN_NHWC, test_datatype, ZDNN_FORMAT_4DFEATURE, ZDNN_OK,
"Not returning ZDNN_OK for below tensor size limit tensor");
set_and_verify_transformed_descriptor(
at_limit, ZDNN_NHWC, test_datatype, ZDNN_FORMAT_4DFEATURE, ZDNN_OK,
"Not returning ZDNN_OK for at tensor size limit tensor");
set_and_verify_transformed_descriptor(
limit_plus1, ZDNN_NHWC, test_datatype, ZDNN_FORMAT_4DFEATURE,
ZDNN_INVALID_SHAPE,
"Not returning ZDNN_INVALID_SHAPE for above tensor size limit tensor");
}
void verify_datatype_tranformed() {
uint32_t dims[ZDNN_MAX_DIMS] = {1, 1, 1, 1};
set_and_verify_transformed_descriptor(
dims, ZDNN_NHWC, test_datatype, ZDNN_FORMAT_4DFEATURE, ZDNN_INVALID_TYPE,
"Not returning ZDNN_INVALID_TYPE with ZDNN_NHWC");
}
void verify_generated_format() {
zdnn_tensor_desc pre_tfrmd_feature_desc, tfrmd_feature_desc;
zdnn_tensor_desc pre_tfrmd_kernel_desc, tfrmd_kernel_desc;
zdnn_init_pre_transformed_desc(ZDNN_NHWC, test_datatype,
&pre_tfrmd_feature_desc, 1, 1, 1, 1);
zdnn_init_pre_transformed_desc(ZDNN_HWCK, test_datatype,
&pre_tfrmd_kernel_desc, 1, 1, 1, 1);
zdnn_generate_transformed_desc(&pre_tfrmd_feature_desc, &tfrmd_feature_desc);
zdnn_generate_transformed_desc(&pre_tfrmd_kernel_desc, &tfrmd_kernel_desc);
TEST_ASSERT_MESSAGE(tfrmd_feature_desc.format == ZDNN_FORMAT_4DFEATURE,
"tfrmd_feature_desc doesn't have correct format set");
TEST_ASSERT_MESSAGE(tfrmd_kernel_desc.format == ZDNN_FORMAT_4DKERNEL,
"tfrmd_kernel_desc doesn't have correct format set");
}
#define BAD_FORMAT 255
#define BAD_LAYOUT 255
void format_undefined_fail() {
uint32_t dims[ZDNN_MAX_DIMS] = {1, 1, 1, 1};
set_and_verify_transformed_descriptor(
dims, ZDNN_NHWC, test_datatype, BAD_FORMAT, ZDNN_INVALID_FORMAT,
"BAD_FORMAT doesn't yield ZDNN_INVALID_FORMAT");
}
void format_feature_layout_notagree_fail() {
uint32_t dims[ZDNN_MAX_DIMS] = {1, 1, 1, 1};
set_and_verify_transformed_descriptor(
dims, ZDNN_HWCK, test_datatype, ZDNN_FORMAT_4DFEATURE,
ZDNN_INVALID_LAYOUT,
"ZDNN_FORMAT_4DFEATURE + ZDNN_HWCK doesn't yield ZDNN_INVALID_LAYOUT");
}
void format_kernel_layout_notagree_fail() {
uint32_t dims[ZDNN_MAX_DIMS] = {1, 1, 1, 1};
set_and_verify_transformed_descriptor(
dims, ZDNN_NHWC, test_datatype, ZDNN_FORMAT_4DKERNEL, ZDNN_INVALID_LAYOUT,
"ZDNN_FORMAT_4DKERNEL + ZDNN_NHWC doesn't yield ZDNN_INVALID_LAYOUT");
}
void format_feature_layout_undefined_fail() {
uint32_t dims[ZDNN_MAX_DIMS] = {1, 1, 1, 1};
set_and_verify_transformed_descriptor(
dims, BAD_LAYOUT, test_datatype, ZDNN_FORMAT_4DFEATURE,
ZDNN_INVALID_LAYOUT,
"ZDNN_FORMAT_4DFEATURE + undefined layout doesn't yield "
"ZDNN_INVALID_LAYOUT");
}
void format_kernel_layout_undefined_fail() {
uint32_t dims[ZDNN_MAX_DIMS] = {1, 1, 1, 1};
set_and_verify_transformed_descriptor(
dims, BAD_LAYOUT, test_datatype, ZDNN_FORMAT_4DKERNEL,
ZDNN_INVALID_LAYOUT,
"ZDNN_FORMAT_4DKERNEL + undefined layout doesn't yield "
"ZDNN_INVALID_LAYOUT");
}
void verify_ztensor_slicing(uint32_t num_slices, uint32_t *shape,
zdnn_data_layouts layout, size_t buffer_size,
zdnn_status exp_status) {
uint64_t num_elements;
switch (layout) {
// 1D isn't valid as it has no dim4. Used for negative test case.
case (ZDNN_1D):
num_elements = shape[0];
break;
case (ZDNN_2DS):
num_elements = shape[0] * shape[1];
break;
case (ZDNN_3DS):
num_elements = shape[0] * shape[1] * shape[2];
break;
case (ZDNN_4D):
case (ZDNN_NHWC):
case (ZDNN_NCHW):
num_elements = shape[0] * shape[1] * shape[2] * shape[3];
break;
default:
TEST_FAIL_MESSAGE_FORMATTED(
"I'm dreadfully sorry but I don't seem to know how to deal with a %s "
"layout. Could you teach me?",
get_data_layout_str(layout));
break;
}
uint64_t num_slice_elements = num_elements / num_slices;
float values[num_elements];
gen_random_float_array(num_elements, values);
zdnn_ztensor *input_ztensor = alloc_ztensor_with_values(
shape, layout, test_datatype, NO_CONCAT, false, values);
// Print out the sliced ztensor
BEGIN_BLOCK_IF_LOGLEVEL_TRACE {
printf("%s() with type %s: dumpdata_ztensor of unsliced input\n", __func__,
get_data_type_str(test_datatype));
dumpdata_ztensor(input_ztensor, AS_FLOAT, false);
}
// Make copies of the original input to confirm it isn't altered later.
zdnn_ztensor copy_input_ztensor;
zdnn_tensor_desc copy_pre_trfmd_desc;
zdnn_tensor_desc copy_trfmd_desc;
memcpy(©_input_ztensor, input_ztensor, sizeof(zdnn_ztensor));
memcpy(©_pre_trfmd_desc, input_ztensor->pre_transformed_desc,
sizeof(zdnn_tensor_desc));
memcpy(©_trfmd_desc, input_ztensor->transformed_desc,
sizeof(zdnn_tensor_desc));
// Create output structs
zdnn_tensor_desc output_pre_tfrmd_desc[num_slices];
zdnn_tensor_desc output_tfrmd_desc[num_slices];
zdnn_ztensor output_ztensors[num_slices];
// Slice the input and if we expect it to succeed, check that values in each
// slice matches the expected values for that slice.
for (uint32_t slice = 0; slice < num_slices; slice++) {
zdnn_status status = ztensor_slice_dim4(
input_ztensor, slice, buffer_size, &output_pre_tfrmd_desc[slice],
&output_tfrmd_desc[slice], &output_ztensors[slice]);
TEST_ASSERT_MESSAGE_FORMATTED(status == exp_status,
"ztensor_slice_dim4() on slice %u failed, "
"status = %08x (%s)",
slice, status,
zdnn_get_status_message(status));
// Only test that output values are valid in positive test cases
if (exp_status == ZDNN_OK) {
// Print out the sliced ztensor
BEGIN_BLOCK_IF_LOGLEVEL_TRACE {
printf("%s() with type %s: dumpdata_ztensor of slice %u\n", __func__,
get_data_type_str(test_datatype), slice);
dumpdata_ztensor(&output_ztensors[slice], AS_FLOAT, false);
}
// Check output buffer_size matches the specified value or calculated
// value if a size wasn't specified.
size_t expected_buffer_size;
if (buffer_size) {
expected_buffer_size = buffer_size;
} else {
expected_buffer_size =
zdnn_getsize_ztensor(input_ztensor->transformed_desc) / num_slices;
}
TEST_ASSERT_MESSAGE_FORMATTED(
expected_buffer_size == output_ztensors[slice].buffer_size,
"expected sliced buffer_size to be %" PRIu64 " but found %" PRIu64,
expected_buffer_size, output_ztensors[slice].buffer_size);
// Check that slice's values match the expected portion of the input
assert_ztensor_values(&output_ztensors[slice], false,
&values[slice * num_slice_elements]);
}
}
// Confirm input structs weren't altered during slicing
TEST_ASSERT_MESSAGE(
memcmp(input_ztensor, ©_input_ztensor, sizeof(zdnn_ztensor)) == 0,
"input_ztensor was unexpectedly altered");
TEST_ASSERT_MESSAGE(
memcmp(input_ztensor->pre_transformed_desc, ©_pre_trfmd_desc,
sizeof(zdnn_tensor_desc)) == 0,
"input_ztensor->pre_transformed_desc was unexpectedly altered");
TEST_ASSERT_MESSAGE(
memcmp(input_ztensor->transformed_desc, ©_trfmd_desc,
sizeof(zdnn_tensor_desc)) == 0,
"input_ztensor->transformed_desc was unexpectedly altered");
// Cleanup allocations
free(input_ztensor);
}
void test_slicing_specified_buffer() {
uint32_t num_slices = 5;
uint32_t shape[] = {num_slices, 2049};
size_t specified_buffer = 135168;
verify_ztensor_slicing(num_slices, shape, ZDNN_2DS, specified_buffer,
ZDNN_OK);
}
void test_slicing_fail_input_has_only_one_dim4() {
uint32_t num_slices = 1;
uint32_t shape[] = {num_slices, 2049};
verify_ztensor_slicing(num_slices, shape, ZDNN_2DS, 0, ZDNN_INVALID_SHAPE);
}
void test_slicing_fail_too_many_slices() {
uint32_t num_slices = 2;
uint32_t shape[] = {num_slices, 2049};
// Create input ztensor
zdnn_ztensor *input_ztensor = alloc_ztensor_with_values(
shape, ZDNN_2DS, test_datatype, NO_CONCAT, true, ZERO_ARRAY);
// Create output structs
zdnn_tensor_desc output_pre_tfrmd_desc;
zdnn_tensor_desc output_tfrmd_desc;
zdnn_ztensor output_ztensors;
// idx is 0 indexed so this should fail because it's too large
uint32_t slice_idx = num_slices;
// Confirm expected failure status
zdnn_status status =
ztensor_slice_dim4(input_ztensor, slice_idx, 0, &output_pre_tfrmd_desc,
&output_tfrmd_desc, &output_ztensors);
TEST_ASSERT_MESSAGE_FORMATTED(
status == ZDNN_INVALID_SHAPE,
"ztensor_slice_dim4() on slice_idx %u failed, status = %08x (%s)",
slice_idx, status, zdnn_get_status_message(status));
}
void test_slicing_1D_fail() {
uint32_t num_slices = 2;
uint32_t shape[] = {num_slices};
verify_ztensor_slicing(num_slices, shape, ZDNN_1D, 0, ZDNN_INVALID_LAYOUT);
}
void test_slicing_2DS_5x2049() {
uint32_t num_slices = 5;
uint32_t shape[] = {num_slices, 2049};
verify_ztensor_slicing(num_slices, shape, ZDNN_2DS, 0, ZDNN_OK);
}
void test_slicing_3DS_5x33x65() {
uint32_t num_slices = 5;
uint32_t shape[] = {num_slices, 33, 65};
verify_ztensor_slicing(num_slices, shape, ZDNN_3DS, 0, ZDNN_OK);
}
void verify_transformed_layout(zdnn_data_layouts from_layout, bool is_concat,
uint32_t dim4, uint32_t dim3, uint32_t dim2,
uint32_t dim1, zdnn_data_layouts exp_to_layout,
zdnn_status exp_status) {
zdnn_tensor_desc pre_tfrmd_desc, tfrmd_desc;
zdnn_status status;
switch (from_layout) {
case ZDNN_2DS:
zdnn_init_pre_transformed_desc(from_layout, test_datatype, &pre_tfrmd_desc,
dim2, dim1);
break;
case ZDNN_3DS:
zdnn_init_pre_transformed_desc(from_layout, test_datatype, &pre_tfrmd_desc,
dim3, dim2, dim1);
break;
case ZDNN_4DS:
zdnn_init_pre_transformed_desc(from_layout, test_datatype, &pre_tfrmd_desc,
dim4, dim2, dim1);
break;
default:
TEST_FAIL_MESSAGE_FORMATTED("unknown from_layout %d", from_layout);
}
if (!is_concat) {
status = zdnn_generate_transformed_desc(&pre_tfrmd_desc, &tfrmd_desc);
} else {
if (from_layout == ZDNN_2DS) {
status = zdnn_generate_transformed_desc_concatenated(
&pre_tfrmd_desc, RNN_TYPE_LSTM | USAGE_BIASES | PREV_LAYER_NONE,
&tfrmd_desc);
} else if (from_layout == ZDNN_3DS) {
status = zdnn_generate_transformed_desc_concatenated(
&pre_tfrmd_desc, RNN_TYPE_LSTM | USAGE_WEIGHTS | PREV_LAYER_NONE,
&tfrmd_desc);
} else {
// error test: caller will attempt to do is_concat = true with something
// other than 2DS/3DS
status = zdnn_generate_transformed_desc_concatenated(
&pre_tfrmd_desc,
RNN_TYPE_LSTM | USAGE_HIDDEN_WEIGHTS | PREV_LAYER_UNI, &tfrmd_desc);
}
}
TEST_ASSERT_MESSAGE_FORMATTED(
status == exp_status,
"zdnn_generate_transformed_desc(_concatenated)() returned "
"status %08x \"%s\" but expected %08x \"%s\"",
status, zdnn_get_status_message(status), exp_status,
zdnn_get_status_message(exp_status));
if (exp_status == ZDNN_OK) {
TEST_ASSERT_MESSAGE_FORMATTED(
tfrmd_desc.layout == exp_to_layout,
"transformed layout is not %s (%d), found %s (%d)",
get_data_layout_str(exp_to_layout), exp_to_layout,
get_data_layout_str(tfrmd_desc.layout), tfrmd_desc.layout);
}
}
void verify_2ds_transformed_layout_normal() {
verify_transformed_layout(ZDNN_2DS, false, 9999, 9999, 1, 1, ZDNN_NHWC,
ZDNN_OK);
}
void verify_2ds_transformed_layout_normal_fail() {
verify_transformed_layout(ZDNN_2DS, false, 9999, 9999, 1, 1, ZDNN_NHWC,
ZDNN_INVALID_TYPE);
}
void verify_2ds_transformed_layout_concat() {
verify_transformed_layout(ZDNN_2DS, true, 9999, 9999, 1, 1, ZDNN_FICO,
ZDNN_OK);
}
void verify_2ds_transformed_layout_concat_fail() {
verify_transformed_layout(ZDNN_2DS, true, 9999, 9999, 1, 1, ZDNN_FICO,
ZDNN_INVALID_TYPE);
}
void verify_3ds_transformed_layout_normal() {
verify_transformed_layout(ZDNN_3DS, false, 9999, 1, 1, 1, ZDNN_NHWC, ZDNN_OK);
}
void verify_3ds_transformed_layout_normal_fail() {
verify_transformed_layout(ZDNN_3DS, false, 9999, 1, 1, 1, ZDNN_NHWC,
ZDNN_INVALID_TYPE);
}
void verify_3ds_transformed_layout_concat() {
verify_transformed_layout(ZDNN_3DS, true, 9999, 1, 1, 1, ZDNN_FICO, ZDNN_OK);
}
void verify_3ds_transformed_layout_concat_fail() {
verify_transformed_layout(ZDNN_3DS, true, 9999, 1, 1, 1, ZDNN_FICO,
ZDNN_INVALID_TYPE);
}
void verify_4ds_transformed_layout_normal() {
verify_transformed_layout(ZDNN_4DS, false, 1, 1, 1, 1, ZDNN_NHWC, ZDNN_OK);
}
void verify_4ds_transformed_layout_normal_fail() {
verify_transformed_layout(ZDNN_4DS, false, 1, 1, 1, 1, ZDNN_NHWC,
ZDNN_INVALID_TYPE);
}
void verify_4ds_transformed_layout_concat_fail() {
// exp_to_layout does not matter, supposed to error out
verify_transformed_layout(ZDNN_4DS, true, 1, 1, 1, 1, ZDNN_NHWC,
ZDNN_INVALID_TYPE);
}
void verify_descriptors_transform_valid_format_4dfeature() {
zdnn_ztensor ztensor;
zdnn_tensor_desc ptd_desc, td_desc;
zdnn_status status;
zdnn_init_pre_transformed_desc(ZDNN_NHWC, FP32, &ptd_desc, 1, 1, 1, 1);
zdnn_generate_transformed_desc(&ptd_desc, &td_desc);
zdnn_init_ztensor(&ptd_desc, &td_desc, &ztensor);
ztensor.transformed_desc->format = ZDNN_FORMAT_4DFEATURE;
zdnn_data_layouts acceptable_tfd_layouts[] = {
ZDNN_NHWC, ZDNN_FICO, ZDNN_ZRH, ZDNN_BIDIR_FICO, ZDNN_BIDIR_ZRH};
for (int i = 0;
i < (sizeof(acceptable_tfd_layouts) / sizeof(acceptable_tfd_layouts[0]));
i++) {
ztensor.transformed_desc->layout = acceptable_tfd_layouts[i];
ztensor.transformed_desc->type = ZDNN_DLFLOAT16;
zdnn_data_formats acceptable_ptfd_layouts[] = {
ZDNN_1D, ZDNN_2D, ZDNN_2DS, ZDNN_3D, ZDNN_3DS,
ZDNN_4D, ZDNN_4DS, ZDNN_NHWC, ZDNN_NCHW};
for (int j = 0; j < (sizeof(acceptable_ptfd_layouts) /
sizeof(acceptable_ptfd_layouts[0]));
j++) {
ztensor.pre_transformed_desc->layout = acceptable_ptfd_layouts[j];
zdnn_data_types acceptable_ptfd_types[] = {BFLOAT, FP16, FP32};
for (int k = 0; k < (sizeof(acceptable_ptfd_types) /
sizeof(acceptable_ptfd_types[0]));
k++) {
ztensor.pre_transformed_desc->type = acceptable_ptfd_types[k];
status = verify_descriptors_transform_ztensor(&ztensor);
TEST_ASSERT_MESSAGE_FORMATTED(
status == ZDNN_OK,
"verify_descriptors_transform_ztensor returned "
"status %08x \"%s\" but expected %08x \"%s\" when running with "
"tfd_layout %s, ptfd_layout %s, ptf_type %s",
status, zdnn_get_status_message(status), ZDNN_OK,
zdnn_get_status_message(ZDNN_OK),
get_data_layout_str(acceptable_tfd_layouts[i]),
get_data_layout_str(acceptable_ptfd_layouts[j]),
get_data_type_str(acceptable_ptfd_types[k]));
}
}
}
}
// ------------------------------------------------------------------------------------------------
int main(void) {
UNITY_BEGIN();
RUN_TEST_ALL_DLFLOAT16_TFRMD_DATATYPES(verify_dims);
RUN_TEST_ALL_DLFLOAT16_TFRMD_DATATYPES(verify_max_tensor_size);
// test all data-types possible
RUN_TEST_ALL_DLFLOAT16_PRE_DATATYPES(verify_datatype_tranformed);
RUN_TEST_ALL_DLFLOAT16_TFRMD_DATATYPES(verify_generated_format);
RUN_TEST_ALL_DLFLOAT16_TFRMD_DATATYPES(format_undefined_fail);
RUN_TEST_ALL_DLFLOAT16_TFRMD_DATATYPES(format_feature_layout_notagree_fail);
RUN_TEST_ALL_DLFLOAT16_TFRMD_DATATYPES(format_kernel_layout_notagree_fail);
RUN_TEST_ALL_DLFLOAT16_TFRMD_DATATYPES(format_feature_layout_undefined_fail);
RUN_TEST_ALL_DLFLOAT16_TFRMD_DATATYPES(format_kernel_layout_undefined_fail);
// TODO write tests that drive now failing paths.
RUN_TEST_ALL_DLFLOAT16_PRE_DATATYPES(verify_2ds_transformed_layout_normal);
RUN_TEST_ALL_INDEX_PRE_DATATYPES(verify_2ds_transformed_layout_normal);
RUN_TEST_ALL_QUANTIZED_PRE_DATATYPES(
verify_2ds_transformed_layout_normal_fail);
RUN_TEST_ALL_TFRMD_DATATYPES(verify_2ds_transformed_layout_normal_fail);
RUN_TEST_ALL_DLFLOAT16_PRE_DATATYPES(verify_2ds_transformed_layout_concat);
RUN_TEST_ALL_QUANTIZED_PRE_DATATYPES(
verify_2ds_transformed_layout_concat_fail);
RUN_TEST_ALL_INDEX_PRE_DATATYPES(verify_2ds_transformed_layout_concat_fail);
RUN_TEST_ALL_TFRMD_DATATYPES(verify_2ds_transformed_layout_concat_fail);
RUN_TEST_ALL_DLFLOAT16_PRE_DATATYPES(verify_3ds_transformed_layout_normal);
RUN_TEST_ALL_INDEX_PRE_DATATYPES(verify_3ds_transformed_layout_normal);
RUN_TEST_ALL_QUANTIZED_PRE_DATATYPES(
verify_3ds_transformed_layout_normal_fail);
RUN_TEST_ALL_TFRMD_DATATYPES(verify_3ds_transformed_layout_normal_fail);
RUN_TEST_ALL_DLFLOAT16_PRE_DATATYPES(verify_3ds_transformed_layout_concat);
RUN_TEST_ALL_QUANTIZED_PRE_DATATYPES(
verify_3ds_transformed_layout_concat_fail);
RUN_TEST_ALL_INDEX_PRE_DATATYPES(verify_3ds_transformed_layout_concat_fail);
RUN_TEST_ALL_TFRMD_DATATYPES(verify_3ds_transformed_layout_concat_fail);
RUN_TEST_ALL_DLFLOAT16_PRE_DATATYPES(verify_4ds_transformed_layout_normal);
RUN_TEST_ALL_INDEX_PRE_DATATYPES(verify_4ds_transformed_layout_normal);
RUN_TEST_ALL_QUANTIZED_PRE_DATATYPES(
verify_4ds_transformed_layout_normal_fail);
RUN_TEST_ALL_TFRMD_DATATYPES(verify_4ds_transformed_layout_normal_fail);
RUN_TEST_ALL_DLFLOAT16_TFRMD_DATATYPES(
verify_4ds_transformed_layout_concat_fail);
RUN_TEST_ALL_DLFLOAT16_PRE_DATATYPES(test_slicing_specified_buffer);
RUN_TEST_ALL_DLFLOAT16_PRE_DATATYPES(
test_slicing_fail_input_has_only_one_dim4);
RUN_TEST_ALL_DLFLOAT16_PRE_DATATYPES(test_slicing_fail_too_many_slices);
RUN_TEST_ALL_DLFLOAT16_PRE_DATATYPES(test_slicing_1D_fail);
RUN_TEST_ALL_DLFLOAT16_PRE_DATATYPES(test_slicing_2DS_5x2049);
RUN_TEST_ALL_DLFLOAT16_PRE_DATATYPES(test_slicing_3DS_5x33x65);
RUN_TEST(verify_descriptors_transform_valid_format_4dfeature);
return UNITY_END();
}
zDNN-1.1.2/tests/testDriver_tensor_verify.c 0000664 0000000 0000000 00000245761 15000221702 0020747 0 ustar 00root root 0000000 0000000 // SPDX-License-Identifier: Apache-2.0
/*
* Copyright IBM Corp. 2021, 2024
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#include "testsupport.h"
#include
#include
#include
#include
void setUp(void) {}
void tearDown(void) {}
/*
* Test ztensor format when created and updated.
*/
void verify_ztensor_format() {
VERIFY_HW_ENV; // verify required HW env is available.
zdnn_tensor_desc pre_tfrmd_desc, tfrmd_desc;
zdnn_ztensor ztensor;
zdnn_status status;
void *data;
uint32_t dim4 = 1, dim3 = 4, dim2 = 4, dim1 = 1;
zdnn_init_pre_transformed_desc(ZDNN_NHWC, FP32, &pre_tfrmd_desc, dim4, dim3,
dim2, dim1);
status = zdnn_generate_transformed_desc(&pre_tfrmd_desc, &tfrmd_desc);
TEST_ASSERT_MESSAGE_FORMATTED(
status == ZDNN_OK,
"zdnn_generate_transformed_desc() failed (status = %08x)", status);
status =
zdnn_init_ztensor_with_malloc(&pre_tfrmd_desc, &tfrmd_desc, &ztensor);
TEST_ASSERT_MESSAGE_FORMATTED(
status == ZDNN_OK,
"zdnn_init_ztensor_with_malloc() failed (status = %08x)", status);
// verify proper state of is_transformed field after ztensor created
TEST_ASSERT_MESSAGE(
false == ztensor.is_transformed,
"Expected ztensor to indicate transform not completed yet.");
data = create_and_fill_random_fp_data(&ztensor);
// transform the app tensor's data into stickified data
LOG_DEBUG("about to transform ztensor", NO_ARG);
status = zdnn_transform_ztensor(&ztensor, data);
TEST_ASSERT_MESSAGE(ZDNN_OK == status,
"zdnn_transform_ztensor did not return OK as expected");
// verify proper state of is_transformed field after ztensor has stickified
// data
TEST_ASSERT_MESSAGE(true == ztensor.is_transformed,
"Expected ztensor to indicate transform was completed.");
// Free allocated storage
free(data);
zdnn_free_ztensor_buffer(&ztensor);
}
/// Common test routine for normal tensors
///
/// \param[in] num_inputs Number of input tensors
/// \param[in] input_shape_lst Pointer to array of pointers to input dim
/// arrays
/// \param[in] input_format_lst Pointer to array of input formats
/// \param[in] input_type_lst Pointer to array of input types
/// \param[in] num_outputs Number of output tensors
/// \param[in] output_shape_lst Pointer to array of pointers to output dim
/// arrays
/// \param[in] output_format_lst Pointer to array of output formats
/// \param[in] output_type_lst Pointer to array of output types
/// \param[in] exp_status Expected status
/// \param[in] error_msg Error message to prepend to the standard error
/// message
///
void test_normal(uint8_t num_inputs, uint32_t **input_shape_lst,
zdnn_data_formats *input_format_lst,
zdnn_data_types *input_type_lst, uint8_t num_outputs,
uint32_t **output_shape_lst,
zdnn_data_formats *output_format_lst,
zdnn_data_types *output_type_lst, zdnn_status exp_status,
char *error_msg) {
zdnn_ztensor input_ztensor[num_inputs];
zdnn_ztensor output_ztensor[num_outputs];
zdnn_status status = ZDNN_OK;
// allocate a transformed descriptor with input_shape_lst[i],
// input_format_lst[i] and input_type_lst[i]
for (int i = 0; i < num_inputs; i++) {
uint32_t *shape = input_shape_lst[i];
input_ztensor[i].transformed_desc = malloc(sizeof(zdnn_tensor_desc));
init_transformed_desc(
input_format_lst[i] == ZDNN_FORMAT_4DFEATURE ? ZDNN_NHWC : ZDNN_HWCK,
input_type_lst[i], input_format_lst[i],
input_ztensor[i].transformed_desc, shape[0], shape[1], shape[2],
shape[3]);
}
// same idea with the outputs
for (int i = 0; i < num_outputs; i++) {
uint32_t *shape = output_shape_lst[i];
output_ztensor[i].transformed_desc = malloc(sizeof(zdnn_tensor_desc));
init_transformed_desc(
output_format_lst[i] == ZDNN_FORMAT_4DFEATURE ? ZDNN_NHWC : ZDNN_HWCK,
output_type_lst[i], output_format_lst[i],
output_ztensor[i].transformed_desc, shape[0], shape[1], shape[2],
shape[3]);
}
// number of inputs to send to verify_tensors() depends on num_inputs
status = verify_tensors(
&input_ztensor[0], (num_inputs > 1) ? &input_ztensor[1] : NULL,
(num_inputs > 2) ? &input_ztensor[2] : NULL, &output_ztensor[0]);
TEST_ASSERT_MESSAGE_FORMATTED(
exp_status == status, "%s Expected status = %08x, actual status = %08x",
error_msg, exp_status, status);
for (int i = 0; i < num_inputs; i++) {
free(input_ztensor[i].transformed_desc);
}
for (int i = 0; i < num_outputs; i++) {
free(output_ztensor[i].transformed_desc);
}
}
/*
* Test verification of valid output tensor along with an input tensor.
* All tensors will be built with same properties.
*/
void verify_1input_pass() {
uint32_t io_shape[ZDNN_MAX_DIMS] = {1, 1, 4, 3};
uint32_t *input_shape_lst[] = {io_shape};
uint32_t *output_shape_lst[] = {io_shape};
zdnn_data_types input_type_lst[] = {ZDNN_DLFLOAT16};
zdnn_data_types output_type_lst[] = {ZDNN_DLFLOAT16};
zdnn_data_formats input_format_lst[] = {ZDNN_FORMAT_4DFEATURE};
zdnn_data_formats output_format_lst[] = {ZDNN_FORMAT_4DFEATURE};
test_normal(1, input_shape_lst, input_format_lst, input_type_lst, 1,
output_shape_lst, output_format_lst, output_type_lst, ZDNN_OK,
"The output and the input tensor is different.");
}
/*
* Test verification of valid output tensor along with 2 input tensors.
* All tensors will be built with same properties.
*/
void verify_2input_pass() {
uint32_t io_shape[ZDNN_MAX_DIMS] = {1, 1, 4, 3};
uint32_t *input_shape_lst[] = {io_shape, io_shape};
uint32_t *output_shape_lst[] = {io_shape};
zdnn_data_types input_type_lst[] = {ZDNN_DLFLOAT16, ZDNN_DLFLOAT16};
zdnn_data_types output_type_lst[] = {ZDNN_DLFLOAT16};
zdnn_data_formats input_format_lst[] = {ZDNN_FORMAT_4DFEATURE,
ZDNN_FORMAT_4DFEATURE};
zdnn_data_formats output_format_lst[] = {ZDNN_FORMAT_4DFEATURE};
test_normal(2, input_shape_lst, input_format_lst, input_type_lst, 1,
output_shape_lst, output_format_lst, output_type_lst, ZDNN_OK,
"The output and the input tensors are different.");
}
/*
* Test verification of valid output tensor along with 3 input tensors.
* All tensors will be built with same properties.
*/
void verify_3input_pass() {
uint32_t io_shape[ZDNN_MAX_DIMS] = {1, 1, 4, 3};
uint32_t *input_shape_lst[] = {io_shape, io_shape, io_shape};
uint32_t *output_shape_lst[] = {io_shape};
zdnn_data_types input_type_lst[] = {ZDNN_DLFLOAT16, ZDNN_DLFLOAT16,
ZDNN_DLFLOAT16};
zdnn_data_types output_type_lst[] = {ZDNN_DLFLOAT16};
zdnn_data_formats input_format_lst[] = {
ZDNN_FORMAT_4DFEATURE, ZDNN_FORMAT_4DFEATURE, ZDNN_FORMAT_4DFEATURE};
zdnn_data_formats output_format_lst[] = {ZDNN_FORMAT_4DFEATURE};
test_normal(3, input_shape_lst, input_format_lst, input_type_lst, 1,
output_shape_lst, output_format_lst, output_type_lst, ZDNN_OK,
"The output and the input tensors are different.");
}
/*
* Test verification of different shapes between 2 input tensors.
* Input tensors will have different shapes.
* Output tensor will have same properties as Input tensor 1.
*/
void verify_input2_fail_shape() {
uint32_t io_shape[ZDNN_MAX_DIMS] = {1, 1, 4, 3};
uint32_t different_shape[ZDNN_MAX_DIMS] = {1, 2, 3, 4};
uint32_t *input_shape_lst[] = {io_shape, different_shape};
uint32_t *output_shape_lst[] = {io_shape};
zdnn_data_types input_type_lst[] = {ZDNN_DLFLOAT16, ZDNN_DLFLOAT16};
zdnn_data_types output_type_lst[] = {ZDNN_DLFLOAT16};
zdnn_data_formats input_format_lst[] = {ZDNN_FORMAT_4DFEATURE,
ZDNN_FORMAT_4DFEATURE};
zdnn_data_formats output_format_lst[] = {ZDNN_FORMAT_4DFEATURE};
test_normal(2, input_shape_lst, input_format_lst, input_type_lst, 1,
output_shape_lst, output_format_lst, output_type_lst,
ZDNN_INVALID_SHAPE,
"Failed to fail on different input tensor shapes.");
}
/*
* Test verification of different shapes between 3 input tensors.
* Input tensor 3 will have different shapes.
* Output tensor will have same properties as Input tensor 1 and 2.
*/
void verify_input3_fail_shape() {
uint32_t io_shape[ZDNN_MAX_DIMS] = {1, 1, 4, 3};
uint32_t different_shape[ZDNN_MAX_DIMS] = {1, 2, 3, 4};
uint32_t *input_shape_lst[] = {io_shape, io_shape, different_shape};
uint32_t *output_shape_lst[] = {io_shape};
zdnn_data_types input_type_lst[] = {ZDNN_DLFLOAT16, ZDNN_DLFLOAT16,
ZDNN_DLFLOAT16};
zdnn_data_types output_type_lst[] = {ZDNN_DLFLOAT16};
zdnn_data_formats input_format_lst[] = {
ZDNN_FORMAT_4DFEATURE, ZDNN_FORMAT_4DFEATURE, ZDNN_FORMAT_4DFEATURE};
zdnn_data_formats output_format_lst[] = {ZDNN_FORMAT_4DFEATURE};
test_normal(3, input_shape_lst, input_format_lst, input_type_lst, 1,
output_shape_lst, output_format_lst, output_type_lst,
ZDNN_INVALID_SHAPE,
"Failed to fail on different input tensor shapes.");
}
/*
* Test verification of different data formats between 2 input tensors.
* Input tensors will have different data formats.
* Output tensor will have same properties as Input tensor 1.
*/
void verify_input2_fail_format() {
uint32_t io_shape[ZDNN_MAX_DIMS] = {1, 1, 4, 3};
uint32_t *input_shape_lst[] = {io_shape, io_shape};
uint32_t *output_shape_lst[] = {io_shape};
zdnn_data_types input_type_lst[] = {ZDNN_DLFLOAT16, ZDNN_DLFLOAT16};
zdnn_data_types output_type_lst[] = {ZDNN_DLFLOAT16};
zdnn_data_formats input_format_lst[] = {ZDNN_FORMAT_4DFEATURE,
ZDNN_FORMAT_4DKERNEL};
zdnn_data_formats output_format_lst[] = {ZDNN_FORMAT_4DFEATURE};
test_normal(2, input_shape_lst, input_format_lst, input_type_lst, 1,
output_shape_lst, output_format_lst, output_type_lst,
ZDNN_INVALID_FORMAT,
"Failed to fail on different input tensor data formats.");
}
/*
* Test verification of different data formats between 3 input tensors.
* Input tensor 3 will have different data formats.
* Output tensor will have same properties as Input tensor 1 and 2.
*/
void verify_input3_fail_format() {
uint32_t io_shape[ZDNN_MAX_DIMS] = {1, 1, 4, 3};
uint32_t *input_shape_lst[] = {io_shape, io_shape, io_shape};
uint32_t *output_shape_lst[] = {io_shape};
zdnn_data_types input_type_lst[] = {ZDNN_DLFLOAT16, ZDNN_DLFLOAT16,
ZDNN_DLFLOAT16};
zdnn_data_types output_type_lst[] = {ZDNN_DLFLOAT16};
zdnn_data_formats input_format_lst[] = {
ZDNN_FORMAT_4DFEATURE, ZDNN_FORMAT_4DFEATURE, ZDNN_FORMAT_4DKERNEL};
zdnn_data_formats output_format_lst[] = {ZDNN_FORMAT_4DFEATURE};
test_normal(3, input_shape_lst, input_format_lst, input_type_lst, 1,
output_shape_lst, output_format_lst, output_type_lst,
ZDNN_INVALID_FORMAT,
"Failed to fail on different input tensor data formats.");
}
/*
* Test verification of different data types between 2 input tensors.
* Input tensors will have different data types.
* Output tensor will have same properties as Input tensor 1.
*/
void verify_input2_fail_dtype() {
uint32_t io_shape[ZDNN_MAX_DIMS] = {1, 1, 4, 3};
uint32_t *input_shape_lst[] = {io_shape, io_shape};
uint32_t *output_shape_lst[] = {io_shape};
zdnn_data_types input_type_lst[] = {ZDNN_DLFLOAT16, FP32};
zdnn_data_types output_type_lst[] = {ZDNN_DLFLOAT16};
zdnn_data_formats input_format_lst[] = {ZDNN_FORMAT_4DFEATURE,
ZDNN_FORMAT_4DFEATURE};
zdnn_data_formats output_format_lst[] = {ZDNN_FORMAT_4DFEATURE};
test_normal(2, input_shape_lst, input_format_lst, input_type_lst, 1,
output_shape_lst, output_format_lst, output_type_lst,
ZDNN_INVALID_TYPE,
"Failed to fail on different input tensor data types.");
}
/*
* Test verification of different data types between 3 input tensors.
* Input tensor 3 will have different data type.
* Output tensor will have same properties as Input tensor 1 and 2.
*/
void verify_input3_fail_dtype() {
uint32_t io_shape[ZDNN_MAX_DIMS] = {1, 1, 4, 3};
uint32_t *input_shape_lst[] = {io_shape, io_shape, io_shape};
uint32_t *output_shape_lst[] = {io_shape};
zdnn_data_types input_type_lst[] = {ZDNN_DLFLOAT16, ZDNN_DLFLOAT16, FP32};
zdnn_data_types output_type_lst[] = {ZDNN_DLFLOAT16};
zdnn_data_formats input_format_lst[] = {
ZDNN_FORMAT_4DFEATURE, ZDNN_FORMAT_4DFEATURE, ZDNN_FORMAT_4DFEATURE};
zdnn_data_formats output_format_lst[] = {ZDNN_FORMAT_4DFEATURE};
test_normal(3, input_shape_lst, input_format_lst, input_type_lst, 1,
output_shape_lst, output_format_lst, output_type_lst,
ZDNN_INVALID_TYPE,
"Failed to fail on different input tensor data types.");
}
/*
* Test verification of different shapes between output and input tensor.
* Input and Output tensor will have a different shape.
*/
void verify_output_fail_shape() {
uint32_t io_shape[ZDNN_MAX_DIMS] = {1, 1, 4, 3};
uint32_t different_shape[ZDNN_MAX_DIMS] = {1, 2, 3, 4};
uint32_t *input_shape_lst[] = {io_shape};
uint32_t *output_shape_lst[] = {different_shape};
zdnn_data_types input_type_lst[] = {ZDNN_DLFLOAT16};
zdnn_data_types output_type_lst[] = {ZDNN_DLFLOAT16};
zdnn_data_formats input_format_lst[] = {ZDNN_FORMAT_4DFEATURE};
zdnn_data_formats output_format_lst[] = {ZDNN_FORMAT_4DFEATURE};
test_normal(1, input_shape_lst, input_format_lst, input_type_lst, 1,
output_shape_lst, output_format_lst, output_type_lst,
ZDNN_INVALID_SHAPE,
"Failed to fail on different output/input tensor shapes.");
}
/*
* Test verification of different data format between output and input
* tensors. Both input tensors will the same properties. Output tensor will
* have a different data format.
*/
void verify_output_fail_format() {
uint32_t io_shape[ZDNN_MAX_DIMS] = {1, 1, 4, 3};
uint32_t *input_shape_lst[] = {io_shape, io_shape};
uint32_t *output_shape_lst[] = {io_shape};
zdnn_data_types input_type_lst[] = {ZDNN_DLFLOAT16, ZDNN_DLFLOAT16};
zdnn_data_types output_type_lst[] = {ZDNN_DLFLOAT16};
zdnn_data_formats input_format_lst[] = {ZDNN_FORMAT_4DFEATURE,
ZDNN_FORMAT_4DFEATURE};
zdnn_data_formats output_format_lst[] = {ZDNN_FORMAT_4DKERNEL};
test_normal(2, input_shape_lst, input_format_lst, input_type_lst, 1,
output_shape_lst, output_format_lst, output_type_lst,
ZDNN_INVALID_FORMAT,
"Failed to fail on different output/input tensor data formats.");
}
/*
* Test verification of different data types between output and input tensors.
* All three input tensors will have the same properties.
* Output tensor will have a different data type.
*/
void verify_output_fail_dtype() {
uint32_t io_shape[ZDNN_MAX_DIMS] = {1, 1, 4, 3};
uint32_t *input_shape_lst[] = {io_shape, io_shape, io_shape};
uint32_t *output_shape_lst[] = {io_shape};
zdnn_data_types input_type_lst[] = {ZDNN_DLFLOAT16, ZDNN_DLFLOAT16,
ZDNN_DLFLOAT16};
zdnn_data_types output_type_lst[] = {FP32};
zdnn_data_formats input_format_lst[] = {
ZDNN_FORMAT_4DFEATURE, ZDNN_FORMAT_4DFEATURE, ZDNN_FORMAT_4DFEATURE};
zdnn_data_formats output_format_lst[] = {ZDNN_FORMAT_4DFEATURE};
test_normal(3, input_shape_lst, input_format_lst, input_type_lst, 1,
output_shape_lst, output_format_lst, output_type_lst,
ZDNN_INVALID_TYPE,
"Failed to fail on different output/input tensor data types.");
}
#define MATMUL_NUM_INPUTS 3
/// Common test routine for matmul op + mutmul bcast op tensors
///
/// \param[in] uint8_t function_code,
/// NNPA_MATMUL_OP or NNPA_MATMUL_OP_BCAST23
/// \param[in] input_shape_lst
/// 2D array, MATMUL_NUM_INPUTS x ZDNN_MAX_DIMS number of
/// dimensions
/// \param[in] input_shape_displace_lst
/// MATMUL_NUM_INPUTS x ZDNN_MAX_DIMS number of
/// displacement for each of the entries in input_shape_lst
/// (e.g., +1, +5, -3, etc)
/// \param[in] input_format_lst
/// array, MATMUL_NUM_INPUTS number of entries of formats
/// \param[in] input_type_lst
/// array, MATMUL_NUM_INPUTS number of entries of types
/// \param[in] output_shape
/// 1D array, ZDNN_MAX_DIMS number of dimensions
/// \param[in] output_shape_displace
/// ZDNN_MAX_DIMS number of displacement for each of the
/// entries in output_shape
/// \param[in] output_format output format
/// \param[in] output_type output type
/// \param[in] exp_status Expected status
///
void test_matmul(
uint8_t function_code,
uint32_t input_shape_lst[MATMUL_NUM_INPUTS][ZDNN_MAX_DIMS],
int32_t input_shape_displace_lst[MATMUL_NUM_INPUTS][ZDNN_MAX_DIMS],
zdnn_data_formats *input_format_lst, zdnn_data_types *input_type_lst,
const uint32_t *output_shape, const int32_t *output_shape_displace,
zdnn_data_formats output_format, zdnn_data_types output_type,
zdnn_status exp_status) {
zdnn_ztensor input_ztensor[MATMUL_NUM_INPUTS];
zdnn_ztensor output_ztensor;
zdnn_status status = ZDNN_OK;
/*
create MATMUL_NUM_INPUTS numbers of transformed descriptors, using:
input_shape_lst[i] + input_shape_displace_lst[i] as shape
e.g., input_shape_lst[i] = {1, 2, 3, 4}
input_shape_displace_lst[i] = {0, 1, -1, 5}
resultant to init_transformed_desc() = { 1 + 0 = 1,
2 + 1 = 3,
3 + -1 = 2,
4 + 5 = 9 }
input_format_lst[i] as format
input_type_lst[i] as type
*/
for (int i = 0; i < MATMUL_NUM_INPUTS; i++) {
input_ztensor[i].transformed_desc = malloc(sizeof(zdnn_tensor_desc));
LOG_DEBUG("input %d -> format %d, type %d\n", i, input_format_lst[i],
input_type_lst[i]);
LOG_DEBUG(" dim4 %d, displace %d\n", input_shape_lst[i][0],
input_shape_displace_lst[i][0]);
LOG_DEBUG(" dim3 %d, displace %d\n", input_shape_lst[i][1],
input_shape_displace_lst[i][1]);
LOG_DEBUG(" dim2 %d, displace %d\n", input_shape_lst[i][2],
input_shape_displace_lst[i][2]);
LOG_DEBUG(" dim1 %d, displace %d\n", input_shape_lst[i][3],
input_shape_displace_lst[i][3]);
init_transformed_desc(
input_format_lst[i] == ZDNN_FORMAT_4DFEATURE ? ZDNN_NHWC : ZDNN_HWCK,
input_type_lst[i], input_format_lst[i],
input_ztensor[i].transformed_desc,
input_shape_lst[i][0] + input_shape_displace_lst[i][0],
input_shape_lst[i][1] + input_shape_displace_lst[i][1],
input_shape_lst[i][2] + input_shape_displace_lst[i][2],
input_shape_lst[i][3] + input_shape_displace_lst[i][3]);
}
LOG_DEBUG("output -> format %d, type %d\n", output_format, output_type);
LOG_DEBUG(" dim4 %d, displace %d\n", output_shape[0],
output_shape_displace[0]);
LOG_DEBUG(" dim3 %d, displace %d\n", output_shape[1],
output_shape_displace[1]);
LOG_DEBUG(" dim2 %d, displace %d\n", output_shape[2],
output_shape_displace[2]);
LOG_DEBUG(" dim1 %d, displace %d\n", output_shape[3],
output_shape_displace[3]);
output_ztensor.transformed_desc = malloc(sizeof(zdnn_tensor_desc));
init_transformed_desc(
output_format == ZDNN_FORMAT_4DFEATURE ? ZDNN_NHWC : ZDNN_HWCK,
output_type, output_format, output_ztensor.transformed_desc,
output_shape[0] + output_shape_displace[0],
output_shape[1] + output_shape_displace[1],
output_shape[2] + output_shape_displace[2],
output_shape[3] + output_shape_displace[3]);
func_sp_parm2_matmul matmul_parm2;
memset(&matmul_parm2, 0, sizeof(func_sp_parm2_matmul));
func_sp_parm3_matmul matmul_parm3;
memset(&matmul_parm3, 0, sizeof(func_sp_parm3_matmul));
matmul_parm3.rec_scale = 1;
func_sp_parm4_matmul matmul_parm4;
memset(&matmul_parm4, 0, sizeof(func_sp_parm4_matmul));
func_sp_parm9_matmul matmul_parm9;
memset(&matmul_parm9, 0, sizeof(func_sp_parm9_matmul));
func_sp_parm10_matmul matmul_parm10;
memset(&matmul_parm10, 0, sizeof(func_sp_parm10_matmul));
switch (function_code) {
case NNPA_MATMUL_OP:
case NNPA_MATMUL_OP_BCAST23:
case NNPA_MATMUL_OP_BCAST1:
status = verify_matmul_op_common(
function_code, &input_ztensor[0], &input_ztensor[1], &input_ztensor[2],
&matmul_parm2, &matmul_parm3, &matmul_parm4, &matmul_parm9,
&matmul_parm10, &output_ztensor);
break;
default:
TEST_FAIL_MESSAGE("unknown mode");
break;
}
TEST_ASSERT_MESSAGE_FORMATTED(exp_status == status,
"Expected status = %08x, actual status = %08x",
exp_status, status);
for (int i = 0; i < MATMUL_NUM_INPUTS; i++) {
free(input_ztensor[i].transformed_desc);
}
free(output_ztensor.transformed_desc);
}
void test_matmul_third(
int32_t input_shape_displace_lst[MATMUL_NUM_INPUTS][ZDNN_MAX_DIMS],
zdnn_data_formats *input_format_lst, zdnn_data_types *input_type_lst,
int32_t *output_shape_displace, zdnn_data_formats output_format,
zdnn_data_types output_type, zdnn_status exp_status) {
uint32_t matmul_op_first_shape[ZDNN_MAX_DIMS] = {4, 1, 16, 8};
uint32_t matmul_op_second_shape[ZDNN_MAX_DIMS] = {4, 1, 8, 4};
uint32_t matmul_op_third_shape[ZDNN_MAX_DIMS] = {4, 1, 1, 4};
// concatenate the 1D arrays into 2D input for test_matmul()
uint32_t input_shape_lst[MATMUL_NUM_INPUTS][ZDNN_MAX_DIMS];
memcpy(input_shape_lst[0], matmul_op_first_shape,
sizeof(uint32_t) * ZDNN_MAX_DIMS);
memcpy(input_shape_lst[1], matmul_op_second_shape,
sizeof(uint32_t) * ZDNN_MAX_DIMS);
memcpy(input_shape_lst[2], matmul_op_third_shape,
sizeof(uint32_t) * ZDNN_MAX_DIMS);
uint32_t matmul_op_result_shape[ZDNN_MAX_DIMS] = {4, 1, 16, 4};
test_matmul(NNPA_MATMUL_OP, input_shape_lst, input_shape_displace_lst,
input_format_lst, input_type_lst, matmul_op_result_shape,
output_shape_displace, output_format, output_type, exp_status);
}
void test_matmul_bcast_op(
int32_t input_shape_displace_lst[MATMUL_NUM_INPUTS][ZDNN_MAX_DIMS],
zdnn_data_formats *input_format_lst, zdnn_data_types *input_type_lst,
const int32_t *output_shape_displace, zdnn_data_formats output_format,
zdnn_data_types output_type, zdnn_status exp_status) {
uint32_t feature = 32, batch = 4, spad_x4 = 256, timestep = 4;
uint32_t input_shape[ZDNN_MAX_DIMS] = {timestep, 1, batch, feature};
uint32_t weights_shape[ZDNN_MAX_DIMS] = {1, 1, feature, spad_x4};
uint32_t bias_shape[ZDNN_MAX_DIMS] = {1, 1, 1, spad_x4};
// concatenate the 1D arrays into 2D input for test_matmul()
uint32_t input_shape_lst[MATMUL_NUM_INPUTS][ZDNN_MAX_DIMS];
memcpy(input_shape_lst[0], input_shape, sizeof(uint32_t) * ZDNN_MAX_DIMS);
memcpy(input_shape_lst[1], weights_shape, sizeof(uint32_t) * ZDNN_MAX_DIMS);
memcpy(input_shape_lst[2], bias_shape, sizeof(uint32_t) * ZDNN_MAX_DIMS);
uint32_t fused_shape[ZDNN_MAX_DIMS] = {timestep, 1, batch, spad_x4};
test_matmul(NNPA_MATMUL_OP_BCAST23, input_shape_lst, input_shape_displace_lst,
input_format_lst, input_type_lst, fused_shape,
output_shape_displace, output_format, output_type, exp_status);
}
/*
* Test verification of valid matmul third tensors.
* All tensors will be built with acceptable properties.
*/
void verify_matmul_op_pass() {
int32_t input_shape_displace_lst[MATMUL_NUM_INPUTS][ZDNN_MAX_DIMS] = {
{0, 0, 0, 0}, {0, 0, 0, 0}, {0, 0, 0, 0}};
zdnn_data_formats input_format_lst[MATMUL_NUM_INPUTS] = {
ZDNN_FORMAT_4DFEATURE, ZDNN_FORMAT_4DFEATURE, ZDNN_FORMAT_4DFEATURE};
zdnn_data_types input_type_lst[MATMUL_NUM_INPUTS] = {
ZDNN_DLFLOAT16, ZDNN_DLFLOAT16, ZDNN_DLFLOAT16};
int32_t output_shape_displace[ZDNN_MAX_DIMS] = {0, 0, 0, 0};
zdnn_data_formats output_format = ZDNN_FORMAT_4DFEATURE;
zdnn_data_types output_type = ZDNN_DLFLOAT16;
test_matmul_third(input_shape_displace_lst, input_format_lst, input_type_lst,
output_shape_displace, output_format, output_type, ZDNN_OK);
}
/*
* Test verification of failed matmul op output shape.
* All input tensors will have acceptable descriptors.
* Output will have invalid number in i-th dimension.
*/
void verify_matmul_op_fail_output_shape() {
int32_t input_shape_displace_lst[][ZDNN_MAX_DIMS] = {
{0, 0, 0, 0}, {0, 0, 0, 0}, {0, 0, 0, 0}};
zdnn_data_formats input_format_lst[MATMUL_NUM_INPUTS] = {
ZDNN_FORMAT_4DFEATURE, ZDNN_FORMAT_4DFEATURE, ZDNN_FORMAT_4DFEATURE};
zdnn_data_types input_type_lst[MATMUL_NUM_INPUTS] = {
ZDNN_DLFLOAT16, ZDNN_DLFLOAT16, ZDNN_DLFLOAT16};
zdnn_data_formats output_format = ZDNN_FORMAT_4DFEATURE;
zdnn_data_types output_type = ZDNN_DLFLOAT16;
for (int i = 0; i < ZDNN_MAX_DIMS; i++) {
int32_t output_shape_displace[ZDNN_MAX_DIMS] = {0, 0, 0, 0};
output_shape_displace[i] = 1;
test_matmul_third(input_shape_displace_lst, input_format_lst,
input_type_lst, output_shape_displace, output_format,
output_type, ZDNN_INVALID_SHAPE);
}
}
/*
* Test verification of failed matmul op third input shape.
* Output will have valid descriptor.
* Input j will have a bad i-th dimension.
*/
void verify_matmul_op_fail_input_shape() {
zdnn_data_formats input_format_lst[] = {
ZDNN_FORMAT_4DFEATURE, ZDNN_FORMAT_4DFEATURE, ZDNN_FORMAT_4DFEATURE};
zdnn_data_types input_type_lst[] = {ZDNN_DLFLOAT16, ZDNN_DLFLOAT16,
ZDNN_DLFLOAT16};
int32_t output_shape_displace[ZDNN_MAX_DIMS] = {0, 0, 0, 0};
zdnn_data_formats output_format = ZDNN_FORMAT_4DFEATURE;
zdnn_data_types output_type = ZDNN_DLFLOAT16;
for (int j = 0; j < MATMUL_NUM_INPUTS; j++) {
for (int i = 0; i < ZDNN_MAX_DIMS; i++) {
int32_t input_shape_displace_lst[][ZDNN_MAX_DIMS] = {
{0, 0, 0, 0}, {0, 0, 0, 0}, {0, 0, 0, 0}};
input_shape_displace_lst[j][i] = 1;
test_matmul_third(input_shape_displace_lst, input_format_lst,
input_type_lst, output_shape_displace, output_format,
output_type, ZDNN_INVALID_SHAPE);
}
}
}
/*
* Test verification of failed matmul op output format.
* All input tensors will have acceptable descriptors.
* Output will have mismatched format.
*/
void verify_matmul_op_fail_output_format() {
int32_t input_shape_displace_lst[][ZDNN_MAX_DIMS] = {
{0, 0, 0, 0}, {0, 0, 0, 0}, {0, 0, 0, 0}};
zdnn_data_formats input_format_lst[MATMUL_NUM_INPUTS] = {
ZDNN_FORMAT_4DFEATURE, ZDNN_FORMAT_4DFEATURE, ZDNN_FORMAT_4DFEATURE};
zdnn_data_types input_type_lst[MATMUL_NUM_INPUTS] = {
ZDNN_DLFLOAT16, ZDNN_DLFLOAT16, ZDNN_DLFLOAT16};
int32_t output_shape_displace[ZDNN_MAX_DIMS] = {0, 0, 0, 0};
zdnn_data_formats output_format = ZDNN_FORMAT_4DKERNEL;
zdnn_data_types output_type = ZDNN_DLFLOAT16;
test_matmul_third(input_shape_displace_lst, input_format_lst, input_type_lst,
output_shape_displace, output_format, output_type,
ZDNN_INVALID_FORMAT);
}
/*
* Test verification of failed matmul op third input format.
* Output will have valid descriptor.
* Input i will have a different format.
*/
void verify_matmul_op_fail_input_format() {
int32_t input_shape_displace_lst[][ZDNN_MAX_DIMS] = {
{0, 0, 0, 0}, {0, 0, 0, 0}, {0, 0, 0, 0}};
zdnn_data_types input_type_lst[MATMUL_NUM_INPUTS] = {
ZDNN_DLFLOAT16, ZDNN_DLFLOAT16, ZDNN_DLFLOAT16};
int32_t output_shape_displace[ZDNN_MAX_DIMS] = {0, 0, 0, 0};
zdnn_data_formats output_format = ZDNN_FORMAT_4DFEATURE;
zdnn_data_types output_type = ZDNN_DLFLOAT16;
for (int i = 0; i < MATMUL_NUM_INPUTS; i++) {
zdnn_data_formats input_format_lst[MATMUL_NUM_INPUTS] = {
ZDNN_FORMAT_4DFEATURE, ZDNN_FORMAT_4DFEATURE, ZDNN_FORMAT_4DFEATURE};
input_format_lst[i] = ZDNN_FORMAT_4DKERNEL;
test_matmul_third(input_shape_displace_lst, input_format_lst,
input_type_lst, output_shape_displace, output_format,
output_type, ZDNN_INVALID_FORMAT);
}
}
/*
* Test verification of failed matmul op output type.
* All input tensors will have acceptable descriptors.
* Output will have mismatched type.
*/
void verify_matmul_op_fail_output_type() {
int32_t input_shape_displace_lst[][ZDNN_MAX_DIMS] = {
{0, 0, 0, 0}, {0, 0, 0, 0}, {0, 0, 0, 0}};
zdnn_data_formats input_format_lst[MATMUL_NUM_INPUTS] = {
ZDNN_FORMAT_4DFEATURE, ZDNN_FORMAT_4DFEATURE, ZDNN_FORMAT_4DFEATURE};
zdnn_data_types input_type_lst[MATMUL_NUM_INPUTS] = {
ZDNN_DLFLOAT16, ZDNN_DLFLOAT16, ZDNN_DLFLOAT16};
int32_t output_shape_displace[ZDNN_MAX_DIMS] = {0, 0, 0, 0};
zdnn_data_formats output_format = ZDNN_FORMAT_4DFEATURE;
zdnn_data_types output_type = FP32;
test_matmul_third(input_shape_displace_lst, input_format_lst, input_type_lst,
output_shape_displace, output_format, output_type,
ZDNN_INVALID_TYPE);
}
/*
* Test verification of failed matmul third input type.
* Output will have valid descriptor.
* Input i will have a different type.
*/
void verify_matmul_op_fail_input_type() {
int32_t input_shape_displace_lst[][ZDNN_MAX_DIMS] = {
{0, 0, 0, 0}, {0, 0, 0, 0}, {0, 0, 0, 0}};
zdnn_data_formats input_format_lst[MATMUL_NUM_INPUTS] = {
ZDNN_FORMAT_4DFEATURE, ZDNN_FORMAT_4DFEATURE, ZDNN_FORMAT_4DFEATURE};
int32_t output_shape_displace[ZDNN_MAX_DIMS] = {0, 0, 0, 0};
zdnn_data_formats output_format = ZDNN_FORMAT_4DFEATURE;
zdnn_data_types output_type = ZDNN_DLFLOAT16;
for (int i = 0; i < MATMUL_NUM_INPUTS; i++) {
zdnn_data_types input_type_lst[MATMUL_NUM_INPUTS] = {
ZDNN_DLFLOAT16, ZDNN_DLFLOAT16, ZDNN_DLFLOAT16};
input_type_lst[i] = FP32;
test_matmul_third(input_shape_displace_lst, input_format_lst,
input_type_lst, output_shape_displace, output_format,
output_type, ZDNN_INVALID_TYPE);
}
}
/*
* Test verification of valid matmul bcast op tensors.
* All tensors will be built with acceptable properties.
*/
void verify_matmul_bcast_op_pass() {
int32_t input_shape_displace_lst[MATMUL_NUM_INPUTS][ZDNN_MAX_DIMS] = {
{0, 0, 0, 0}, {0, 0, 0, 0}, {0, 0, 0, 0}};
zdnn_data_formats input_format_lst[MATMUL_NUM_INPUTS] = {
ZDNN_FORMAT_4DFEATURE, ZDNN_FORMAT_4DFEATURE, ZDNN_FORMAT_4DFEATURE};
zdnn_data_types input_type_lst[MATMUL_NUM_INPUTS] = {
ZDNN_DLFLOAT16, ZDNN_DLFLOAT16, ZDNN_DLFLOAT16};
int32_t output_shape_displace[ZDNN_MAX_DIMS] = {0, 0, 0, 0};
zdnn_data_formats output_format = ZDNN_FORMAT_4DFEATURE;
zdnn_data_types output_type = ZDNN_DLFLOAT16;
test_matmul_bcast_op(input_shape_displace_lst, input_format_lst,
input_type_lst, output_shape_displace, output_format,
output_type, ZDNN_OK);
}
/*
* Test verification of failed matmul bcast op output shape.
* All input tensors will have acceptable descriptors.
* Output will have invalid number in i-th dimension.
*/
void verify_matmul_bcast_op_fail_output_shape() {
int32_t input_shape_displace_lst[][ZDNN_MAX_DIMS] = {
{0, 0, 0, 0}, {0, 0, 0, 0}, {0, 0, 0, 0}};
zdnn_data_formats input_format_lst[MATMUL_NUM_INPUTS] = {
ZDNN_FORMAT_4DFEATURE, ZDNN_FORMAT_4DFEATURE, ZDNN_FORMAT_4DFEATURE};
zdnn_data_types input_type_lst[MATMUL_NUM_INPUTS] = {
ZDNN_DLFLOAT16, ZDNN_DLFLOAT16, ZDNN_DLFLOAT16};
zdnn_data_formats output_format = ZDNN_FORMAT_4DFEATURE;
zdnn_data_types output_type = ZDNN_DLFLOAT16;
for (int i = 0; i < ZDNN_MAX_DIMS; i++) {
int32_t output_shape_displace[ZDNN_MAX_DIMS] = {0, 0, 0, 0};
output_shape_displace[i] = 1;
test_matmul_bcast_op(input_shape_displace_lst, input_format_lst,
input_type_lst, output_shape_displace, output_format,
output_type, ZDNN_INVALID_SHAPE);
}
}
/*
* Test verification of failed matmul bcast op input shape.
* Output will have valid descriptor.
* Input j will have a bad i-th dimension.
*/
void verify_matmul_bcast_op_fail_input_shape() {
zdnn_data_formats input_format_lst[] = {
ZDNN_FORMAT_4DFEATURE, ZDNN_FORMAT_4DFEATURE, ZDNN_FORMAT_4DFEATURE};
zdnn_data_types input_type_lst[] = {ZDNN_DLFLOAT16, ZDNN_DLFLOAT16,
ZDNN_DLFLOAT16};
int32_t output_shape_displace[ZDNN_MAX_DIMS] = {0, 0, 0, 0};
zdnn_data_formats output_format = ZDNN_FORMAT_4DFEATURE;
zdnn_data_types output_type = ZDNN_DLFLOAT16;
for (int j = 0; j < MATMUL_NUM_INPUTS; j++) {
for (int i = 0; i < ZDNN_MAX_DIMS; i++) {
int32_t input_shape_displace_lst[][ZDNN_MAX_DIMS] = {
{0, 0, 0, 0}, {0, 0, 0, 0}, {0, 0, 0, 0}};
input_shape_displace_lst[j][i] = 1;
test_matmul_bcast_op(input_shape_displace_lst, input_format_lst,
input_type_lst, output_shape_displace, output_format,
output_type, ZDNN_INVALID_SHAPE);
}
}
}
/*
* Test verification of failed matmul bcast op input format.
* All input/output tensors will have acceptable descriptors, except
* input2 will have mismatched format.
*/
void verify_matmul_bcast_op_fail_input_format() {
int32_t input_shape_displace_lst[][ZDNN_MAX_DIMS] = {
{0, 0, 0, 0}, {0, 0, 0, 0}, {0, 0, 0, 0}};
zdnn_data_formats input_format_lst[MATMUL_NUM_INPUTS] = {
ZDNN_FORMAT_4DFEATURE, ZDNN_FORMAT_4DKERNEL, ZDNN_FORMAT_4DFEATURE};
zdnn_data_types input_type_lst[MATMUL_NUM_INPUTS] = {
ZDNN_DLFLOAT16, ZDNN_DLFLOAT16, ZDNN_DLFLOAT16};
int32_t output_shape_displace[ZDNN_MAX_DIMS] = {0, 0, 0, 0};
zdnn_data_formats output_format = ZDNN_FORMAT_4DFEATURE;
zdnn_data_types output_type = ZDNN_DLFLOAT16;
test_matmul_bcast_op(input_shape_displace_lst, input_format_lst,
input_type_lst, output_shape_displace, output_format,
output_type, ZDNN_INVALID_FORMAT);
}
/*
* Test verification of failed matmul bcast op output format.
* All input tensors will have acceptable descriptors.
* Output will have mismatched format.
*/
void verify_matmul_bcast_op_fail_output_format() {
int32_t input_shape_displace_lst[][ZDNN_MAX_DIMS] = {
{0, 0, 0, 0}, {0, 0, 0, 0}, {0, 0, 0, 0}};
zdnn_data_formats input_format_lst[MATMUL_NUM_INPUTS] = {
ZDNN_FORMAT_4DFEATURE, ZDNN_FORMAT_4DFEATURE, ZDNN_FORMAT_4DFEATURE};
zdnn_data_types input_type_lst[MATMUL_NUM_INPUTS] = {
ZDNN_DLFLOAT16, ZDNN_DLFLOAT16, ZDNN_DLFLOAT16};
int32_t output_shape_displace[ZDNN_MAX_DIMS] = {0, 0, 0, 0};
zdnn_data_formats output_format = ZDNN_FORMAT_4DKERNEL;
zdnn_data_types output_type = ZDNN_DLFLOAT16;
test_matmul_bcast_op(input_shape_displace_lst, input_format_lst,
input_type_lst, output_shape_displace, output_format,
output_type, ZDNN_INVALID_FORMAT);
}
/*
* Test verification of failed matmul bcast op output type.
* All input tensors will have acceptable descriptors.
* Output will have mismatched type.
*/
void verify_matmul_bcast_op_fail_output_type() {
int32_t input_shape_displace_lst[][ZDNN_MAX_DIMS] = {
{0, 0, 0, 0}, {0, 0, 0, 0}, {0, 0, 0, 0}};
zdnn_data_formats input_format_lst[MATMUL_NUM_INPUTS] = {
ZDNN_FORMAT_4DFEATURE, ZDNN_FORMAT_4DFEATURE, ZDNN_FORMAT_4DFEATURE};
zdnn_data_types input_type_lst[MATMUL_NUM_INPUTS] = {
ZDNN_DLFLOAT16, ZDNN_DLFLOAT16, ZDNN_DLFLOAT16};
int32_t output_shape_displace[ZDNN_MAX_DIMS] = {0, 0, 0, 0};
zdnn_data_formats output_format = ZDNN_FORMAT_4DFEATURE;
zdnn_data_types output_type = FP32;
test_matmul_bcast_op(input_shape_displace_lst, input_format_lst,
input_type_lst, output_shape_displace, output_format,
output_type, ZDNN_INVALID_TYPE);
}
/*
* Test verification of failed matmul bcast op input type.
* Output will have valid descriptor.
* Input i will have a different type.
*/
void verify_matmul_bcast_op_fail_input_type() {
int32_t input_shape_displace_lst[][ZDNN_MAX_DIMS] = {
{0, 0, 0, 0}, {0, 0, 0, 0}, {0, 0, 0, 0}};
zdnn_data_formats input_format_lst[MATMUL_NUM_INPUTS] = {
ZDNN_FORMAT_4DFEATURE, ZDNN_FORMAT_4DFEATURE, ZDNN_FORMAT_4DFEATURE};
int32_t output_shape_displace[ZDNN_MAX_DIMS] = {0, 0, 0, 0};
zdnn_data_formats output_format = ZDNN_FORMAT_4DFEATURE;
zdnn_data_types output_type = ZDNN_DLFLOAT16;
for (int i = 0; i < MATMUL_NUM_INPUTS; i++) {
zdnn_data_types input_type_lst[MATMUL_NUM_INPUTS] = {
ZDNN_DLFLOAT16, ZDNN_DLFLOAT16, ZDNN_DLFLOAT16};
input_type_lst[i] = FP32;
test_matmul_bcast_op(input_shape_displace_lst, input_format_lst,
input_type_lst, output_shape_displace, output_format,
output_type, ZDNN_INVALID_TYPE);
}
}
/// Common test routine for batchnorm tensors
///
/// \param[in] sbtg_input_b_dim_idx
/// which dimension (4, 3, 2 or 1) of scale tensor shape to
/// sabotage, set to 0 if nothing to sabotage
/// \param[in] sbtg_input_b_val scale tensor sabotage value
/// \param[in] sbtg_input_c_dim_idx
/// which dimension (4, 3, 2 or 1) of bias tensor shape to
/// sabotage, set to 0 if nothing to sabotage
/// \param[in] sbtg_input_c_val bias tensor sabotage value
/// \param[in] exp_status Expected status
///
void test_batchnorm(uint8_t sbtg_input_b_dim_idx, uint32_t sbtg_input_b_val,
int8_t sbtg_input_c_dim_idx, uint32_t sbtg_input_c_val,
zdnn_status exp_status) {
zdnn_tensor_desc tfrmd_desc_input_a, tfrmd_desc_input_b, tfrmd_desc_input_c,
tfrmd_desc_output;
zdnn_ztensor input_a, input_b, input_c, output;
input_a.transformed_desc = &tfrmd_desc_input_a;
input_b.transformed_desc = &tfrmd_desc_input_b;
input_c.transformed_desc = &tfrmd_desc_input_c;
output.transformed_desc = &tfrmd_desc_output;
uint32_t input_a_shape[ZDNN_MAX_DIMS] = {1, 1, 2, 4};
uint32_t input_b_shape[ZDNN_MAX_DIMS] = {1, 1, 1, 4};
uint32_t input_c_shape[ZDNN_MAX_DIMS] = {1, 1, 1, 4};
uint32_t output_shape[ZDNN_MAX_DIMS] = {1, 1, 2, 4};
zdnn_status status;
// e.g., sabotage dim_idx = 4 -> modify shape[0]
// sabotage dim_idx = 1 -> modify shape[3]
if (sbtg_input_b_dim_idx != 0) {
input_b_shape[ZDNN_MAX_DIMS - sbtg_input_b_dim_idx] = sbtg_input_b_val;
}
if (sbtg_input_c_dim_idx != 0) {
input_c_shape[ZDNN_MAX_DIMS - sbtg_input_c_dim_idx] = sbtg_input_c_val;
}
init_transformed_desc(ZDNN_NHWC, ZDNN_DLFLOAT16, ZDNN_FORMAT_4DFEATURE,
&tfrmd_desc_input_a, input_a_shape[0], input_a_shape[1],
input_a_shape[2], input_a_shape[3]);
init_transformed_desc(ZDNN_NHWC, ZDNN_DLFLOAT16, ZDNN_FORMAT_4DFEATURE,
&tfrmd_desc_input_b, input_b_shape[0], input_b_shape[1],
input_b_shape[2], input_b_shape[3]);
init_transformed_desc(ZDNN_NHWC, ZDNN_DLFLOAT16, ZDNN_FORMAT_4DFEATURE,
&tfrmd_desc_input_c, input_c_shape[0], input_c_shape[1],
input_c_shape[2], input_c_shape[3]);
// The output is a 4D tensor of same shape, format, and data type as the
// input
init_transformed_desc(ZDNN_NHWC, ZDNN_DLFLOAT16, ZDNN_FORMAT_4DFEATURE,
&tfrmd_desc_output, output_shape[0], output_shape[1],
output_shape[2], output_shape[3]);
status = verify_batchnorm_tensors(&input_a, &input_b, &input_c, &output);
TEST_ASSERT_MESSAGE_FORMATTED(exp_status == status,
"Expected status = %08x, actual status = %08x",
exp_status, status);
}
/*
* Simple test of verifying default inputs and output.
*/
void batchnorm_verify_pass() { test_batchnorm(0, 0, 0, 0, ZDNN_OK); }
/*
* Test that expects error due to dimension-2 of scale tensor is not 1
*/
void batchnorm_verify_input_b_bad_dim2_fail() {
test_batchnorm(2, 2, 0, 0, ZDNN_INVALID_SHAPE);
}
/*
* Test that expects error due to dimension-1 of scale tensor is not the same as
* the other tensors
*/
void batchnorm_verify_input_b_bad_dim1_fail() {
test_batchnorm(1, 3, 0, 0, ZDNN_INVALID_SHAPE);
}
/*
* Test that expects error due to dimension-2 of bias tensor is not 1
*/
void batchnorm_verify_input_c_bad_dim2_fail() {
test_batchnorm(0, 0, 2, 2, ZDNN_INVALID_SHAPE);
}
/*
* Test that expects error due to dimension-1 of bias tensor is not the same as
* the other tensors
*/
void batchnorm_verify_input_c_bad_dim1_fail() {
test_batchnorm(0, 0, 1, 3, ZDNN_INVALID_SHAPE);
}
/// Common test routine for relu tensors
///
/// \param[in] input_shape Pointer to input dim array
/// \param[in] input_format Input format
/// \param[in] input_type Input type
/// \param[in] output_shape Pointer to output dim array
/// \param[in] output_format Output format
/// \param[in] output_type Output type
/// \param[in] exp_status Expected status
/// \param[in] error_msg Error message to prepend to the standard error
/// message
///
void test_relu(uint32_t input_shape[], zdnn_data_formats input_format,
zdnn_data_types input_type, uint32_t output_shape[],
zdnn_data_formats output_format, zdnn_data_types output_type,
zdnn_status exp_status, char *error_msg) {
zdnn_status status = ZDNN_OK;
zdnn_ztensor input, output;
zdnn_tensor_desc tfrmd_desc_input, tfrmd_desc_output;
input.transformed_desc = &tfrmd_desc_input;
output.transformed_desc = &tfrmd_desc_output;
init_transformed_desc(ZDNN_NHWC, input_type, input_format,
input.transformed_desc, input_shape[0], input_shape[1],
input_shape[2], input_shape[3]);
uint32_t clipping_value = 0;
uint32_t adjustment_factor = 0;
init_transformed_desc(ZDNN_NHWC, output_type, output_format,
output.transformed_desc, output_shape[0],
output_shape[1], output_shape[2], output_shape[3]);
status =
verify_relu_tensors(&input, &clipping_value, &adjustment_factor, &output);
TEST_ASSERT_MESSAGE_FORMATTED(
exp_status == status, "%s Expected status = %08x, actual status = %08x",
error_msg, exp_status, status);
}
void relu_verify_pass() {
uint32_t input_shape[ZDNN_MAX_DIMS] = {1, 1, 2, 4};
uint32_t output_shape[ZDNN_MAX_DIMS] = {1, 1, 2, 4};
test_relu(input_shape, ZDNN_FORMAT_4DFEATURE, ZDNN_DLFLOAT16, output_shape,
ZDNN_FORMAT_4DFEATURE, ZDNN_DLFLOAT16, ZDNN_OK,
"The output and the input tensor is different.");
}
void relu_verify_fail_shape() {
uint32_t input_shape[ZDNN_MAX_DIMS] = {1, 1, 2, 3};
uint32_t output_shape[ZDNN_MAX_DIMS] = {1, 1, 2, 4};
test_relu(input_shape, ZDNN_FORMAT_4DFEATURE, ZDNN_DLFLOAT16, output_shape,
ZDNN_FORMAT_4DFEATURE, ZDNN_DLFLOAT16, ZDNN_INVALID_SHAPE,
"Failed to fail on different shapes.");
}
void relu_verify_fail_format() {
uint32_t input_shape[ZDNN_MAX_DIMS] = {1, 1, 2, 4};
uint32_t output_shape[ZDNN_MAX_DIMS] = {1, 1, 2, 4};
test_relu(input_shape, ZDNN_FORMAT_4DFEATURE, ZDNN_DLFLOAT16, output_shape,
ZDNN_FORMAT_4DKERNEL, ZDNN_DLFLOAT16, ZDNN_INVALID_FORMAT,
"Failed to fail on different formats.");
}
void relu_verify_fail_dtype() {
uint32_t input_shape[ZDNN_MAX_DIMS] = {1, 1, 2, 4};
uint32_t output_shape[ZDNN_MAX_DIMS] = {1, 1, 2, 4};
test_relu(input_shape, ZDNN_FORMAT_4DFEATURE, FP32, output_shape,
ZDNN_FORMAT_4DFEATURE, ZDNN_DLFLOAT16, ZDNN_INVALID_TYPE,
"Failed to fail on different types.");
}
/// Common test routine for norm tensors
///
/// \param[in] input_a_shape input a tensor shape
/// \param[in] input_b_shape input b tensor shape
/// \param[in] output_shape output tensor shape
/// \param[in] exp_status Expected status
///
void test_norm(uint32_t input_a_shape[], uint32_t input_b_shape[],
uint32_t output_shape[], zdnn_status exp_status,
int ztensor_to_error) {
zdnn_tensor_desc tfrmd_desc[3];
zdnn_ztensor input_a, input_b, output;
input_a.transformed_desc = &tfrmd_desc[0];
input_b.transformed_desc = &tfrmd_desc[1];
output.transformed_desc = &tfrmd_desc[2];
init_transformed_desc(ZDNN_NHWC, ZDNN_DLFLOAT16, ZDNN_FORMAT_4DFEATURE,
&tfrmd_desc[0], input_a_shape[0], input_a_shape[1],
input_a_shape[2], input_a_shape[3]);
init_transformed_desc(ZDNN_NHWC, ZDNN_DLFLOAT16, ZDNN_FORMAT_4DFEATURE,
&tfrmd_desc[1], input_b_shape[0], input_b_shape[1],
input_b_shape[2], input_b_shape[3]);
init_transformed_desc(ZDNN_NHWC, ZDNN_DLFLOAT16, ZDNN_FORMAT_4DFEATURE,
&tfrmd_desc[2], output_shape[0], output_shape[1],
output_shape[2], output_shape[3]);
if (exp_status == ZDNN_INVALID_TYPE) {
// cppcheck-suppress unreadVariable
tfrmd_desc[ztensor_to_error].type = FP32;
}
if (exp_status == ZDNN_INVALID_FORMAT) {
// cppcheck-suppress unreadVariable
tfrmd_desc[ztensor_to_error].format = ZDNN_FORMAT_4DKERNEL;
}
zdnn_status status = verify_norm_tensors(&input_a, &input_b, &output);
TEST_ASSERT_MESSAGE_FORMATTED(exp_status == status,
"Expected status = %08x, actual status = %08x",
exp_status, status);
}
void norm_verify_pass() {
// Trivial correct input and output shape test to pass.
uint32_t shape_i[] = {1, 1, 1, 1};
uint32_t shape_o[] = {1, 1, 1, 1};
test_norm(shape_i, shape_i, shape_o, ZDNN_OK, 0);
}
// Check for dim-4 index size of all specified tensors are the same.
void norm_verify_input_bad_dim4_fail() {
// Fail since input and output dim4 are not equal.
uint32_t shape_i[] = {10, 1, 1, 10};
uint32_t shape_o[] = {1, 1, 1, 1};
test_norm(shape_i, shape_i, shape_o, ZDNN_INVALID_SHAPE, 0);
}
// Check for dim-3 index size of all specified tensors is 1.
void norm_verify_input_bad_dim3_fail() {
// Fail since input and output dim3 are not 1
uint32_t shape_i[] = {1, 1, 5, 18};
uint32_t shape_o[] = {1, 1, 1, 1};
test_norm(shape_i, shape_i, shape_o, ZDNN_INVALID_SHAPE, 0);
}
// Check for dim-2 index size of all specified tensors are the same.
void norm_verify_input_bad_dim2_fail() {
// Fail since input and output dim2 are not equal.
uint32_t shape_i[] = {1, 2, 2, 10};
uint32_t shape_o[] = {1, 4, 2, 1};
test_norm(shape_i, shape_i, shape_o, ZDNN_INVALID_SHAPE, 0);
}
// Check for dim-1 index size of all specified input tensors are
// the same.
void norm_verify_input_bad_dim1_fail() {
// Fail since dim4 of a & b are not equal.
uint32_t shape_i_a[] = {1, 2, 70, 180};
uint32_t shape_i_b[] = {1, 2, 70, 200};
uint32_t shape_o[] = {1, 2, 70, 1};
test_norm(shape_i_a, shape_i_b, shape_o, ZDNN_INVALID_SHAPE, 0);
}
// Check for dim-1 index size of output tensor is 1.
void norm_verify_output_bad_dim1_fail() {
uint32_t shape_i[] = {1, 2, 70, 180};
// Fail since output dim4=180, not 1
uint32_t shape_o[] = {1, 2, 70, 180};
test_norm(shape_i, shape_i, shape_o, ZDNN_INVALID_SHAPE, 0);
}
void norm_verify_bad_inputa_type_fail() {
// Trivial correct input and output shape test to pass.
uint32_t shape_i[] = {1, 1, 1, 1};
uint32_t shape_o[] = {1, 1, 1, 1};
test_norm(shape_i, shape_i, shape_o, ZDNN_INVALID_TYPE, 0);
}
void norm_verify_bad_inputb_type_fail() {
// Trivial correct input and output shape test to pass.
uint32_t shape_i[] = {1, 1, 1, 1};
uint32_t shape_o[] = {1, 1, 1, 1};
test_norm(shape_i, shape_i, shape_o, ZDNN_INVALID_TYPE, 1);
}
void norm_verify_bad_output_type_fail() {
// Trivial correct input and output shape test to pass.
uint32_t shape_i[] = {1, 1, 1, 1};
uint32_t shape_o[] = {1, 1, 1, 1};
test_norm(shape_i, shape_i, shape_o, ZDNN_INVALID_TYPE, 2);
}
void norm_verify_bad_inputa_format_fail() {
// Trivial correct input and output shape test to pass.
uint32_t shape_i[] = {1, 1, 1, 1};
uint32_t shape_o[] = {1, 1, 1, 1};
test_norm(shape_i, shape_i, shape_o, ZDNN_INVALID_FORMAT, 0);
}
void norm_verify_bad_inputb_format_fail() {
// Trivial correct input and output shape test to pass.
uint32_t shape_i[] = {1, 1, 1, 1};
uint32_t shape_o[] = {1, 1, 1, 1};
test_norm(shape_i, shape_i, shape_o, ZDNN_INVALID_FORMAT, 1);
}
void norm_verify_bad_output_format_fail() {
// Trivial correct input and output shape test to pass.
uint32_t shape_i[] = {1, 1, 1, 1};
uint32_t shape_o[] = {1, 1, 1, 1};
test_norm(shape_i, shape_i, shape_o, ZDNN_INVALID_FORMAT, 2);
}
/// Common test routine for moments tensors
///
/// \param[in] input_a_shape input a tensor shape
/// \param[in] bessel_correction bessel correction type
/// \param[in] output_a_shape output a tensor shape
/// \param[in] output_b_shape output b tensor shape
/// \param[in] exp_status Expected status
///
void test_moments(uint32_t input_a_shape[], uint32_t bessel_correction,
uint32_t output_a_shape[], uint32_t output_b_shape[],
zdnn_data_types type_in, zdnn_data_formats format_in,
zdnn_data_types type_out_a, zdnn_data_formats format_out_a,
zdnn_data_types type_out_b, zdnn_data_formats format_out_b,
zdnn_status exp_status) {
zdnn_tensor_desc tfrmd_desc_input_a, tfrmd_desc_output_a, tfrmd_desc_output_b;
zdnn_ztensor input_a, output_a, output_b;
input_a.transformed_desc = &tfrmd_desc_input_a;
output_a.transformed_desc = &tfrmd_desc_output_a;
output_b.transformed_desc = &tfrmd_desc_output_b;
func_sp_parm1_moments moments_parm1;
memset(&moments_parm1, 0, sizeof(func_sp_parm1_moments));
moments_parm1.bessel_correction = bessel_correction;
init_transformed_desc(ZDNN_NHWC, type_in, format_in, &tfrmd_desc_input_a,
input_a_shape[0], input_a_shape[1], input_a_shape[2],
input_a_shape[3]);
init_transformed_desc(ZDNN_NHWC, type_out_a, format_out_a,
&tfrmd_desc_output_a, output_a_shape[0],
output_a_shape[1], output_a_shape[2],
output_a_shape[3]);
init_transformed_desc(ZDNN_NHWC, type_out_b, format_out_b,
&tfrmd_desc_output_b, output_b_shape[0],
output_b_shape[1], output_b_shape[2],
output_b_shape[3]);
zdnn_status status =
verify_moments_tensors(&input_a, &moments_parm1, &output_a, &output_b);
TEST_ASSERT_MESSAGE_FORMATTED(exp_status == status,
"Expected status = %08x, actual status = %08x",
exp_status, status);
}
void moments_verify_pass() {
// Trivial correct input and output shape test to pass.
uint32_t input_a[] = {1, 2, 2, 1};
uint32_t output_a[] = {1, 1, 1, 1};
uint32_t output_b[] = {1, 1, 1, 1};
uint32_t bessel_correction = 0;
test_moments(input_a, bessel_correction, output_a, output_b, ZDNN_DLFLOAT16,
ZDNN_FORMAT_4DFEATURE, ZDNN_DLFLOAT16, ZDNN_FORMAT_4DFEATURE,
ZDNN_DLFLOAT16, ZDNN_FORMAT_4DFEATURE, ZDNN_OK);
}
void moments_bad_bessel_correction() {
uint32_t input_a[] = {1, 1, 1, 1};
uint32_t output_a[] = {1, 1, 1, 1};
uint32_t output_b[] = {1, 1, 1, 1};
uint32_t bessel_correction = 1;
test_moments(input_a, bessel_correction, output_a, output_b, ZDNN_DLFLOAT16,
ZDNN_FORMAT_4DFEATURE, ZDNN_DLFLOAT16, ZDNN_FORMAT_4DFEATURE,
ZDNN_DLFLOAT16, ZDNN_FORMAT_4DFEATURE,
ZDNN_INVALID_BESSEL_CORRECTION);
}
void moments_bad_out_a_dim4_fail() {
uint32_t input_a[] = {1, 1, 1, 1};
uint32_t output_a[] = {2, 1, 1, 1};
uint32_t output_b[] = {1, 1, 1, 1};
uint32_t bessel_correction = 0;
test_moments(input_a, bessel_correction, output_a, output_b, ZDNN_DLFLOAT16,
ZDNN_FORMAT_4DFEATURE, ZDNN_DLFLOAT16, ZDNN_FORMAT_4DFEATURE,
ZDNN_DLFLOAT16, ZDNN_FORMAT_4DFEATURE, ZDNN_INVALID_SHAPE);
}
void moments_bad_out_a_dim3_fail() {
uint32_t input_a[] = {1, 1, 1, 1};
uint32_t output_a[] = {1, 2, 1, 1};
uint32_t output_b[] = {1, 1, 1, 1};
uint32_t bessel_correction = 0;
test_moments(input_a, bessel_correction, output_a, output_b, ZDNN_DLFLOAT16,
ZDNN_FORMAT_4DFEATURE, ZDNN_DLFLOAT16, ZDNN_FORMAT_4DFEATURE,
ZDNN_DLFLOAT16, ZDNN_FORMAT_4DFEATURE, ZDNN_INVALID_SHAPE);
}
void moments_bad_out_a_dim2_fail() {
uint32_t input_a[] = {1, 1, 1, 1};
uint32_t output_a[] = {1, 1, 2, 1};
uint32_t output_b[] = {1, 1, 1, 1};
uint32_t bessel_correction = 0;
test_moments(input_a, bessel_correction, output_a, output_b, ZDNN_DLFLOAT16,
ZDNN_FORMAT_4DFEATURE, ZDNN_DLFLOAT16, ZDNN_FORMAT_4DFEATURE,
ZDNN_DLFLOAT16, ZDNN_FORMAT_4DFEATURE, ZDNN_INVALID_SHAPE);
}
void moments_bad_out_a_dim1_fail() {
uint32_t input_a[] = {1, 1, 1, 1};
uint32_t output_a[] = {1, 1, 1, 2};
uint32_t output_b[] = {1, 1, 1, 1};
uint32_t bessel_correction = 0;
test_moments(input_a, bessel_correction, output_a, output_b, ZDNN_DLFLOAT16,
ZDNN_FORMAT_4DFEATURE, ZDNN_DLFLOAT16, ZDNN_FORMAT_4DFEATURE,
ZDNN_DLFLOAT16, ZDNN_FORMAT_4DFEATURE, ZDNN_INVALID_SHAPE);
}
void moments_bad_out_b_dim4_fail() {
uint32_t input_a[] = {1, 1, 1, 1};
uint32_t output_a[] = {1, 1, 1, 1};
uint32_t output_b[] = {2, 1, 1, 1};
uint32_t bessel_correction = 0;
test_moments(input_a, bessel_correction, output_a, output_b, ZDNN_DLFLOAT16,
ZDNN_FORMAT_4DFEATURE, ZDNN_DLFLOAT16, ZDNN_FORMAT_4DFEATURE,
ZDNN_DLFLOAT16, ZDNN_FORMAT_4DFEATURE, ZDNN_INVALID_SHAPE);
}
void moments_bad_out_b_dim3_fail() {
uint32_t input_a[] = {1, 1, 1, 1};
uint32_t output_a[] = {1, 1, 1, 1};
uint32_t output_b[] = {1, 2, 1, 1};
uint32_t bessel_correction = 0;
test_moments(input_a, bessel_correction, output_a, output_b, ZDNN_DLFLOAT16,
ZDNN_FORMAT_4DFEATURE, ZDNN_DLFLOAT16, ZDNN_FORMAT_4DFEATURE,
ZDNN_DLFLOAT16, ZDNN_FORMAT_4DFEATURE, ZDNN_INVALID_SHAPE);
}
void moments_bad_out_b_dim2_fail() {
uint32_t input_a[] = {1, 1, 1, 1};
uint32_t output_a[] = {1, 1, 1, 1};
uint32_t output_b[] = {1, 1, 2, 1};
uint32_t bessel_correction = 0;
test_moments(input_a, bessel_correction, output_a, output_b, ZDNN_DLFLOAT16,
ZDNN_FORMAT_4DFEATURE, ZDNN_DLFLOAT16, ZDNN_FORMAT_4DFEATURE,
ZDNN_DLFLOAT16, ZDNN_FORMAT_4DFEATURE, ZDNN_INVALID_SHAPE);
}
void moments_bad_out_b_dim1_fail() {
uint32_t input_a[] = {1, 1, 1, 1};
uint32_t output_a[] = {1, 1, 1, 1};
uint32_t output_b[] = {1, 1, 1, 2};
uint32_t bessel_correction = 0;
test_moments(input_a, bessel_correction, output_a, output_b, ZDNN_DLFLOAT16,
ZDNN_FORMAT_4DFEATURE, ZDNN_DLFLOAT16, ZDNN_FORMAT_4DFEATURE,
ZDNN_DLFLOAT16, ZDNN_FORMAT_4DFEATURE, ZDNN_INVALID_SHAPE);
}
void moments_bad_format_in_fail() {
uint32_t input_a[] = {1, 2, 2, 1};
uint32_t output_a[] = {1, 1, 1, 1};
uint32_t output_b[] = {1, 1, 1, 1};
uint32_t bessel_correction = 0;
test_moments(input_a, bessel_correction, output_a, output_b, ZDNN_DLFLOAT16,
ZDNN_FORMAT_4DKERNEL, ZDNN_DLFLOAT16, ZDNN_FORMAT_4DFEATURE,
ZDNN_DLFLOAT16, ZDNN_FORMAT_4DFEATURE, ZDNN_INVALID_FORMAT);
}
void moments_bad_format_out_a_fail() {
uint32_t input_a[] = {1, 2, 2, 1};
uint32_t output_a[] = {1, 1, 1, 1};
uint32_t output_b[] = {1, 1, 1, 1};
uint32_t bessel_correction = 0;
test_moments(input_a, bessel_correction, output_a, output_b, ZDNN_DLFLOAT16,
ZDNN_FORMAT_4DFEATURE, ZDNN_DLFLOAT16, ZDNN_FORMAT_4DKERNEL,
ZDNN_DLFLOAT16, ZDNN_FORMAT_4DFEATURE, ZDNN_INVALID_FORMAT);
}
void moments_bad_format_out_b_fail() {
uint32_t input_a[] = {1, 2, 2, 1};
uint32_t output_a[] = {1, 1, 1, 1};
uint32_t output_b[] = {1, 1, 1, 1};
uint32_t bessel_correction = 0;
test_moments(input_a, bessel_correction, output_a, output_b, ZDNN_DLFLOAT16,
ZDNN_FORMAT_4DFEATURE, ZDNN_DLFLOAT16, ZDNN_FORMAT_4DFEATURE,
ZDNN_DLFLOAT16, ZDNN_FORMAT_4DKERNEL, ZDNN_INVALID_FORMAT);
}
void moments_bad_type_in_fail() {
uint32_t input_a[] = {1, 2, 2, 1};
uint32_t output_a[] = {1, 1, 1, 1};
uint32_t output_b[] = {1, 1, 1, 1};
uint32_t bessel_correction = 0;
test_moments(input_a, bessel_correction, output_a, output_b, FP32,
ZDNN_FORMAT_4DFEATURE, ZDNN_DLFLOAT16, ZDNN_FORMAT_4DFEATURE,
ZDNN_DLFLOAT16, ZDNN_FORMAT_4DFEATURE, ZDNN_INVALID_TYPE);
}
void moments_bad_type_out_a_fail() {
uint32_t input_a[] = {1, 2, 2, 1};
uint32_t output_a[] = {1, 1, 1, 1};
uint32_t output_b[] = {1, 1, 1, 1};
uint32_t bessel_correction = 0;
test_moments(input_a, bessel_correction, output_a, output_b, ZDNN_DLFLOAT16,
ZDNN_FORMAT_4DFEATURE, FP32, ZDNN_FORMAT_4DFEATURE,
ZDNN_DLFLOAT16, ZDNN_FORMAT_4DFEATURE, ZDNN_INVALID_TYPE);
}
void moments_bad_type_out_b_fail() {
uint32_t input_a[] = {1, 2, 2, 1};
uint32_t output_a[] = {1, 1, 1, 1};
uint32_t output_b[] = {1, 1, 1, 1};
uint32_t bessel_correction = 0;
test_moments(input_a, bessel_correction, output_a, output_b, ZDNN_DLFLOAT16,
ZDNN_FORMAT_4DFEATURE, ZDNN_DLFLOAT16, ZDNN_FORMAT_4DFEATURE,
FP32, ZDNN_FORMAT_4DFEATURE, ZDNN_INVALID_TYPE);
}
/// Common test routine for layernorm tensors
///
/// \param[in] input_a_shape input a tensor shape
/// \param[in] input_b_shape input b tensor shape
/// \param[in] input_c_shape input c tensor shape
/// \param[in] beta beta fsp
/// \param[in] gamma gamma fsp
/// \param[in] epsilon epsilon fsp
/// \param[in] output_shape output tensor shape
/// \param[in] exp_status Expected status
///
void test_layernorm(uint32_t input_a_shape[], uint32_t input_b_shape[],
uint32_t input_c_shape[], uint32_t output_shape[],
float beta_value, float gamma_value, float epsilon_value,
zdnn_status exp_status, int ztensor_to_error) {
VERIFY_HW_ENV;
VERIFY_PARMBLKFORMAT_1;
zdnn_tensor_desc tfrmd_desc[4];
zdnn_ztensor input_a, input_b, input_c, output;
input_a.transformed_desc = &tfrmd_desc[0];
input_b.transformed_desc = &tfrmd_desc[1];
input_c.transformed_desc = &tfrmd_desc[2];
output.transformed_desc = &tfrmd_desc[3];
func_sp_parm1_layernorm layernorm_parm1;
memset(&layernorm_parm1, 0, sizeof(func_sp_parm1_layernorm));
if (beta_value != 0) {
layernorm_parm1.beta = cnvt_1_fp32_to_dlf16(beta_value);
}
func_sp_parm2_layernorm layernorm_parm2;
memset(&layernorm_parm2, 0, sizeof(func_sp_parm2_layernorm));
if (gamma_value != 0) {
layernorm_parm2.gamma = cnvt_1_fp32_to_dlf16(gamma_value);
}
func_sp_parm3_layernorm layernorm_parm3;
memset(&layernorm_parm3, 0, sizeof(func_sp_parm3_layernorm));
if (epsilon_value != 0) {
layernorm_parm3.epsilon = cnvt_1_fp32_to_dlf16(epsilon_value);
}
init_transformed_desc(ZDNN_NHWC, ZDNN_DLFLOAT16, ZDNN_FORMAT_4DFEATURE,
&tfrmd_desc[0], input_a_shape[0], input_a_shape[1],
input_a_shape[2], input_a_shape[3]);
init_transformed_desc(ZDNN_NHWC, ZDNN_DLFLOAT16, ZDNN_FORMAT_4DFEATURE,
&tfrmd_desc[1], input_b_shape[0], input_b_shape[1],
input_b_shape[2], input_b_shape[3]);
init_transformed_desc(ZDNN_NHWC, ZDNN_DLFLOAT16, ZDNN_FORMAT_4DFEATURE,
&tfrmd_desc[2], input_c_shape[0], input_c_shape[1],
input_c_shape[2], input_c_shape[3]);
init_transformed_desc(ZDNN_NHWC, ZDNN_DLFLOAT16, ZDNN_FORMAT_4DFEATURE,
&tfrmd_desc[3], output_shape[0], output_shape[1],
output_shape[2], output_shape[3]);
if (exp_status == ZDNN_INVALID_TYPE) {
// cppcheck-suppress unreadVariable
tfrmd_desc[ztensor_to_error].type = FP32;
}
if (exp_status == ZDNN_INVALID_FORMAT) {
// cppcheck-suppress unreadVariable
tfrmd_desc[ztensor_to_error].format = ZDNN_FORMAT_4DKERNEL;
}
zdnn_status status =
verify_layernorm_tensors(&input_a, &input_b, &input_c, &layernorm_parm1,
&layernorm_parm2, &layernorm_parm3, &output);
TEST_ASSERT_MESSAGE_FORMATTED(exp_status == status,
"Expected status = %08x, actual status = %08x",
exp_status, status);
}
void layernorm_verify_pass() {
// Trivial correct input and output shape test to pass.
uint32_t shape_a[] = {1, 1, 1, 6};
uint32_t shape_b[] = {1, 1, 1, 1};
uint32_t shape_c[] = {1, 1, 1, 1};
uint32_t shape_o[] = {1, 1, 1, 6};
float beta = 0.02;
float gamma = 0.05;
float epsilon = 0.01;
test_layernorm(shape_a, shape_b, shape_c, shape_o, beta, gamma, epsilon,
ZDNN_OK, 0);
}
void layernorm_verify_bad_beta_fail() {
uint32_t shape_a[] = {1, 1, 1, 6};
uint32_t shape_b[] = {1, 1, 1, 1};
uint32_t shape_c[] = {1, 1, 1, 1};
uint32_t shape_o[] = {1, 1, 1, 6};
float beta = 22147483648;
float gamma = 0.05;
float epsilon = 0.01;
test_layernorm(shape_a, shape_b, shape_c, shape_o, beta, gamma, epsilon,
ZDNN_INVALID_BETA, 0);
}
void layernorm_verify_bad_gamma_fail() {
uint32_t shape_a[] = {1, 1, 1, 6};
uint32_t shape_b[] = {1, 1, 1, 1};
uint32_t shape_c[] = {1, 1, 1, 1};
uint32_t shape_o[] = {1, 1, 1, 6};
float beta = 0.02;
float gamma = 22147483648;
float epsilon = 0.01;
test_layernorm(shape_a, shape_b, shape_c, shape_o, beta, gamma, epsilon,
ZDNN_INVALID_GAMMA, 0);
}
void layernorm_verify_bad_epsilon_fail() {
uint32_t shape_a[] = {1, 1, 1, 6};
uint32_t shape_b[] = {1, 1, 1, 1};
uint32_t shape_c[] = {1, 1, 1, 1};
uint32_t shape_o[] = {1, 1, 1, 6};
float beta = 0.02;
float gamma = 0.05;
float epsilon = 22147483648;
test_layernorm(shape_a, shape_b, shape_c, shape_o, beta, gamma, epsilon,
ZDNN_INVALID_EPSILON, 0);
}
//
// Input A
//
void layernorm_verify_input_a_bad_dim1_fail() {
uint32_t shape_a[] = {1, 1, 1, 10};
uint32_t shape_b[] = {1, 1, 1, 1};
uint32_t shape_c[] = {1, 1, 1, 1};
uint32_t shape_o[] = {1, 1, 1, 6};
float beta = 0.02;
float gamma = 0.05;
float epsilon = 0.01;
test_layernorm(shape_a, shape_b, shape_c, shape_o, beta, gamma, epsilon,
ZDNN_INVALID_SHAPE, 0);
}
void layernorm_verify_input_a_bad_dim2_fail() {
uint32_t shape_a[] = {1, 1, 40, 6};
uint32_t shape_b[] = {1, 1, 1, 1};
uint32_t shape_c[] = {1, 1, 1, 1};
uint32_t shape_o[] = {1, 1, 1, 6};
float beta = 0.02;
float gamma = 0.05;
float epsilon = 0.01;
test_layernorm(shape_a, shape_b, shape_c, shape_o, beta, gamma, epsilon,
ZDNN_INVALID_SHAPE, 0);
}
void layernorm_verify_input_a_bad_dim3_fail() {
uint32_t shape_a[] = {1, 16, 1, 6};
uint32_t shape_b[] = {1, 1, 1, 1};
uint32_t shape_c[] = {1, 1, 1, 1};
uint32_t shape_o[] = {1, 1, 1, 6};
float beta = 0.02;
float gamma = 0.05;
float epsilon = 0.01;
test_layernorm(shape_a, shape_b, shape_c, shape_o, beta, gamma, epsilon,
ZDNN_INVALID_SHAPE, 0);
}
//
// Input B
//
void layernorm_verify_input_b_bad_dim1_fail() {
uint32_t shape_a[] = {1, 1, 1, 1};
uint32_t shape_b[] = {1, 1, 1, 5};
uint32_t shape_c[] = {1, 1, 1, 1};
uint32_t shape_o[] = {1, 1, 1, 6};
float beta = 0.02;
float gamma = 0.05;
float epsilon = 0.01;
test_layernorm(shape_a, shape_b, shape_c, shape_o, beta, gamma, epsilon,
ZDNN_INVALID_SHAPE, 0);
}
void layernorm_verify_input_b_bad_dim2_fail() {
uint32_t shape_a[] = {1, 1, 1, 6};
uint32_t shape_b[] = {1, 1, 5, 1};
uint32_t shape_c[] = {1, 1, 1, 1};
uint32_t shape_o[] = {1, 1, 1, 6};
float beta = 0.02;
float gamma = 0.05;
float epsilon = 0.01;
test_layernorm(shape_a, shape_b, shape_c, shape_o, beta, gamma, epsilon,
ZDNN_INVALID_SHAPE, 0);
}
void layernorm_verify_input_b_bad_dim3_fail() {
uint32_t shape_a[] = {1, 1, 1, 6};
uint32_t shape_b[] = {1, 5, 1, 1};
uint32_t shape_c[] = {1, 1, 1, 1};
uint32_t shape_o[] = {1, 1, 1, 6};
float beta = 0.02;
float gamma = 0.05;
float epsilon = 0.01;
test_layernorm(shape_a, shape_b, shape_c, shape_o, beta, gamma, epsilon,
ZDNN_INVALID_SHAPE, 0);
}
//
// Input C
//
void layernorm_verify_input_c_bad_dim1_fail() {
uint32_t shape_a[] = {1, 1, 1, 1};
uint32_t shape_b[] = {1, 1, 1, 1};
uint32_t shape_c[] = {1, 1, 1, 5};
uint32_t shape_o[] = {1, 1, 1, 6};
float beta = 0.02;
float gamma = 0.05;
float epsilon = 0.01;
test_layernorm(shape_a, shape_b, shape_c, shape_o, beta, gamma, epsilon,
ZDNN_INVALID_SHAPE, 0);
}
void layernorm_verify_input_c_bad_dim2_fail() {
uint32_t shape_a[] = {1, 1, 1, 6};
uint32_t shape_b[] = {1, 1, 1, 1};
uint32_t shape_c[] = {1, 1, 5, 1};
uint32_t shape_o[] = {1, 1, 1, 6};
float beta = 0.02;
float gamma = 0.05;
float epsilon = 0.01;
test_layernorm(shape_a, shape_b, shape_c, shape_o, beta, gamma, epsilon,
ZDNN_INVALID_SHAPE, 0);
}
void layernorm_verify_input_c_bad_dim3_fail() {
uint32_t shape_a[] = {1, 1, 1, 6};
uint32_t shape_b[] = {1, 1, 1, 1};
uint32_t shape_c[] = {1, 5, 1, 1};
uint32_t shape_o[] = {1, 1, 1, 6};
float beta = 0.02;
float gamma = 0.05;
float epsilon = 0.01;
test_layernorm(shape_a, shape_b, shape_c, shape_o, beta, gamma, epsilon,
ZDNN_INVALID_SHAPE, 0);
}
//
// Output
//
void layernorm_verify_bad_dim4_fail() {
uint32_t shape_a[] = {19, 1, 1, 1};
uint32_t shape_b[] = {18, 1, 1, 1};
uint32_t shape_c[] = {17, 1, 1, 1};
uint32_t shape_o[] = {16, 1, 1, 1};
float beta = 0.02;
float gamma = 0.05;
float epsilon = 0.01;
test_layernorm(shape_a, shape_b, shape_c, shape_o, beta, gamma, epsilon,
ZDNN_INVALID_SHAPE, 0);
}
void layernorm_verify_bad_inputa_type_fail() {
// Trivial correct input and output shape test to pass.
uint32_t shape_a[] = {1, 1, 1, 6};
uint32_t shape_b[] = {1, 1, 1, 1};
uint32_t shape_c[] = {1, 1, 1, 1};
uint32_t shape_o[] = {1, 1, 1, 6};
float beta = 0.02;
float gamma = 0.05;
float epsilon = 0.01;
test_layernorm(shape_a, shape_b, shape_c, shape_o, beta, gamma, epsilon,
ZDNN_INVALID_TYPE, 0);
}
void layernorm_verify_bad_inputb_type_fail() {
// Trivial correct input and output shape test to pass.
uint32_t shape_a[] = {1, 1, 1, 6};
uint32_t shape_b[] = {1, 1, 1, 1};
uint32_t shape_c[] = {1, 1, 1, 1};
uint32_t shape_o[] = {1, 1, 1, 6};
float beta = 0.02;
float gamma = 0.05;
float epsilon = 0.01;
test_layernorm(shape_a, shape_b, shape_c, shape_o, beta, gamma, epsilon,
ZDNN_INVALID_TYPE, 1);
}
void layernorm_verify_bad_inputc_type_fail() {
// Trivial correct input and output shape test to pass.
uint32_t shape_a[] = {1, 1, 1, 6};
uint32_t shape_b[] = {1, 1, 1, 1};
uint32_t shape_c[] = {1, 1, 1, 1};
uint32_t shape_o[] = {1, 1, 1, 6};
float beta = 0.02;
float gamma = 0.05;
float epsilon = 0.01;
test_layernorm(shape_a, shape_b, shape_c, shape_o, beta, gamma, epsilon,
ZDNN_INVALID_TYPE, 2);
}
void layernorm_verify_bad_output_type_fail() {
// Trivial correct input and output shape test to pass.
uint32_t shape_a[] = {1, 1, 1, 6};
uint32_t shape_b[] = {1, 1, 1, 1};
uint32_t shape_c[] = {1, 1, 1, 1};
uint32_t shape_o[] = {1, 1, 1, 6};
float beta = 0.02;
float gamma = 0.05;
float epsilon = 0.01;
test_layernorm(shape_a, shape_b, shape_c, shape_o, beta, gamma, epsilon,
ZDNN_INVALID_TYPE, 3);
}
void layernorm_verify_bad_inputa_format_fail() {
// Trivial correct input and output shape test to pass.
uint32_t shape_a[] = {1, 1, 1, 6};
uint32_t shape_b[] = {1, 1, 1, 1};
uint32_t shape_c[] = {1, 1, 1, 1};
uint32_t shape_o[] = {1, 1, 1, 6};
float beta = 0.02;
float gamma = 0.05;
float epsilon = 0.01;
test_layernorm(shape_a, shape_b, shape_c, shape_o, beta, gamma, epsilon,
ZDNN_INVALID_FORMAT, 0);
}
void layernorm_verify_bad_inputb_format_fail() {
// Trivial correct input and output shape test to pass.
uint32_t shape_a[] = {1, 1, 1, 6};
uint32_t shape_b[] = {1, 1, 1, 1};
uint32_t shape_c[] = {1, 1, 1, 1};
uint32_t shape_o[] = {1, 1, 1, 6};
float beta = 0.02;
float gamma = 0.05;
float epsilon = 0.01;
test_layernorm(shape_a, shape_b, shape_c, shape_o, beta, gamma, epsilon,
ZDNN_INVALID_FORMAT, 1);
}
void layernorm_verify_bad_inputc_format_fail() {
// Trivial correct input and output shape test to pass.
uint32_t shape_a[] = {1, 1, 1, 6};
uint32_t shape_b[] = {1, 1, 1, 1};
uint32_t shape_c[] = {1, 1, 1, 1};
uint32_t shape_o[] = {1, 1, 1, 6};
float beta = 0.02;
float gamma = 0.05;
float epsilon = 0.01;
test_layernorm(shape_a, shape_b, shape_c, shape_o, beta, gamma, epsilon,
ZDNN_INVALID_FORMAT, 2);
}
void layernorm_verify_bad_output_format_fail() {
// Trivial correct input and output shape test to pass.
uint32_t shape_a[] = {1, 1, 1, 6};
uint32_t shape_b[] = {1, 1, 1, 1};
uint32_t shape_c[] = {1, 1, 1, 1};
uint32_t shape_o[] = {1, 1, 1, 6};
float beta = 0.02;
float gamma = 0.05;
float epsilon = 0.01;
test_layernorm(shape_a, shape_b, shape_c, shape_o, beta, gamma, epsilon,
ZDNN_INVALID_FORMAT, 3);
}
/// Common test routine for reduce tensors
///
/// \param[in] input_shape Pointer to input dim array
/// \param[in] input_format Input format
/// \param[in] input_type Input type
/// \param[in] output_shape Pointer to output dim array
/// \param[in] output_format Output format
/// \param[in] output_type Output type
/// \param[in] exp_status Expected status
/// \param[in] error_msg Error message to prepend to the standard error
/// message
///
void test_reduce(uint32_t input_shape[], zdnn_data_formats input_format,
zdnn_data_types input_type, uint32_t output_shape[],
zdnn_data_formats output_format, zdnn_data_types output_type,
zdnn_status exp_status, char *error_msg) {
zdnn_status status = ZDNN_OK;
zdnn_ztensor input, output;
zdnn_tensor_desc tfrmd_desc_input, tfrmd_desc_output;
input.transformed_desc = &tfrmd_desc_input;
output.transformed_desc = &tfrmd_desc_output;
init_transformed_desc(ZDNN_NHWC, input_type, input_format,
input.transformed_desc, input_shape[0], input_shape[1],
input_shape[2], input_shape[3]);
init_transformed_desc(ZDNN_NHWC, output_type, output_format,
output.transformed_desc, output_shape[0],
output_shape[1], output_shape[2], output_shape[3]);
status = verify_reduce_tensors(&input, &output);
TEST_ASSERT_MESSAGE_FORMATTED(
exp_status == status, "%s Expected status = %08x, actual status = %08x",
error_msg, exp_status, status);
}
void reduce_verify_pass() {
uint32_t input_shape[ZDNN_MAX_DIMS] = {1, 1, 2, 4};
uint32_t output_shape[ZDNN_MAX_DIMS] = {1, 1, 2, 1};
test_reduce(input_shape, ZDNN_FORMAT_4DFEATURE, ZDNN_DLFLOAT16, output_shape,
ZDNN_FORMAT_4DFEATURE, ZDNN_DLFLOAT16, ZDNN_OK,
"The output and the input tensor is different.");
}
void reduce_verify_fail_shape() {
uint32_t input_shape[ZDNN_MAX_DIMS] = {1, 1, 2, 3};
uint32_t output_shape[ZDNN_MAX_DIMS] = {1, 1, 2, 4};
test_reduce(input_shape, ZDNN_FORMAT_4DFEATURE, ZDNN_DLFLOAT16, output_shape,
ZDNN_FORMAT_4DFEATURE, ZDNN_DLFLOAT16, ZDNN_INVALID_SHAPE,
"Failed to fail on different shapes.");
}
void reduce_verify_fail_format() {
uint32_t input_shape[ZDNN_MAX_DIMS] = {1, 1, 2, 4};
uint32_t output_shape[ZDNN_MAX_DIMS] = {1, 1, 2, 1};
test_reduce(input_shape, ZDNN_FORMAT_4DFEATURE, ZDNN_DLFLOAT16, output_shape,
ZDNN_FORMAT_4DKERNEL, ZDNN_DLFLOAT16, ZDNN_INVALID_FORMAT,
"Failed to fail on different formats.");
}
void reduce_verify_fail_dtype() {
uint32_t input_shape[ZDNN_MAX_DIMS] = {1, 1, 2, 4};
uint32_t output_shape[ZDNN_MAX_DIMS] = {1, 1, 2, 1};
test_reduce(input_shape, ZDNN_FORMAT_4DFEATURE, FP32, output_shape,
ZDNN_FORMAT_4DFEATURE, ZDNN_DLFLOAT16, ZDNN_INVALID_TYPE,
"Failed to fail on different types.");
}
/// Common test routine for invsqrt tensors
///
/// \param[in] input_shape Input a tensor shape
/// \param[in] epsilon Epsilon fsp
/// \param[in] output_shape Output tensor shape
/// \param[in] exp_status Expected status
///
void test_invsqrt(uint32_t input_shape[], zdnn_data_types input_type,
zdnn_data_formats input_format, uint32_t output_shape[],
zdnn_data_types output_type, zdnn_data_formats output_format,
float epsilon, zdnn_status exp_status) {
VERIFY_HW_ENV;
VERIFY_PARMBLKFORMAT_1;
zdnn_tensor_desc tfrmd_desc_input, tfrmd_desc_output;
zdnn_ztensor input, output;
input.transformed_desc = &tfrmd_desc_input;
output.transformed_desc = &tfrmd_desc_output;
func_sp_parm1_invsqrt invsqrt_parm1;
memset(&invsqrt_parm1, 0, sizeof(func_sp_parm1_invsqrt));
if (epsilon != 0) {
invsqrt_parm1.epsilon = cnvt_1_fp32_to_dlf16(epsilon);
}
init_transformed_desc(ZDNN_NHWC, input_type, input_format, &tfrmd_desc_input,
input_shape[0], input_shape[1], input_shape[2],
input_shape[3]);
init_transformed_desc(ZDNN_NHWC, output_type, output_format,
&tfrmd_desc_output, output_shape[0], output_shape[1],
output_shape[2], output_shape[3]);
zdnn_status status = verify_invsqrt_tensors(&input, &invsqrt_parm1, &output);
TEST_ASSERT_MESSAGE_FORMATTED(exp_status == status,
"Expected status = %08x, actual status = %08x",
exp_status, status);
}
void invsqrt_verify_pass() {
// Trivial correct input and output shape test to pass.
uint32_t shape_a[] = {1, 1, 1, 1};
uint32_t shape_o[] = {1, 1, 1, 1};
float epsilon = 0.01;
test_invsqrt(shape_a, ZDNN_DLFLOAT16, ZDNN_FORMAT_4DFEATURE, shape_o,
ZDNN_DLFLOAT16, ZDNN_FORMAT_4DFEATURE, epsilon, ZDNN_OK);
}
void invsqrt_verify_bad_epsilon_fail() {
uint32_t shape_a[] = {1, 1, 1, 1};
uint32_t shape_o[] = {1, 1, 1, 1};
float epsilon = 22147483648;
test_invsqrt(shape_a, ZDNN_DLFLOAT16, ZDNN_FORMAT_4DFEATURE, shape_o,
ZDNN_DLFLOAT16, ZDNN_FORMAT_4DFEATURE, epsilon,
ZDNN_INVALID_EPSILON);
}
void invsqrt_verify_input_bad_dim1_fail() {
uint32_t shape_a[] = {1, 1, 1, 2};
uint32_t shape_o[] = {1, 1, 1, 1};
float epsilon = 0.01;
test_invsqrt(shape_a, ZDNN_DLFLOAT16, ZDNN_FORMAT_4DFEATURE, shape_o,
ZDNN_DLFLOAT16, ZDNN_FORMAT_4DFEATURE, epsilon,
ZDNN_INVALID_SHAPE);
}
void invsqrt_verify_input_bad_dim2_fail() {
uint32_t shape_a[] = {1, 1, 2, 1};
uint32_t shape_o[] = {1, 1, 1, 1};
float epsilon = 0.01;
test_invsqrt(shape_a, ZDNN_DLFLOAT16, ZDNN_FORMAT_4DFEATURE, shape_o,
ZDNN_DLFLOAT16, ZDNN_FORMAT_4DFEATURE, epsilon,
ZDNN_INVALID_SHAPE);
}
void invsqrt_verify_input_bad_dim3_fail() {
uint32_t shape_a[] = {1, 2, 1, 1};
uint32_t shape_o[] = {1, 1, 1, 1};
float epsilon = 0.01;
test_invsqrt(shape_a, ZDNN_DLFLOAT16, ZDNN_FORMAT_4DFEATURE, shape_o,
ZDNN_DLFLOAT16, ZDNN_FORMAT_4DFEATURE, epsilon,
ZDNN_INVALID_SHAPE);
}
void invsqrt_verify_input_bad_dim4_fail() {
uint32_t shape_a[] = {2, 1, 1, 1};
uint32_t shape_o[] = {1, 1, 1, 1};
float epsilon = 0.01;
test_invsqrt(shape_a, ZDNN_DLFLOAT16, ZDNN_FORMAT_4DFEATURE, shape_o,
ZDNN_DLFLOAT16, ZDNN_FORMAT_4DFEATURE, epsilon,
ZDNN_INVALID_SHAPE);
}
void invsqrt_verify_output_bad_dim1_fail() {
uint32_t shape_a[] = {1, 1, 1, 1};
uint32_t shape_o[] = {1, 1, 1, 2};
float epsilon = 0.01;
test_invsqrt(shape_a, ZDNN_DLFLOAT16, ZDNN_FORMAT_4DFEATURE, shape_o,
ZDNN_DLFLOAT16, ZDNN_FORMAT_4DFEATURE, epsilon,
ZDNN_INVALID_SHAPE);
}
void invsqrt_verify_output_bad_dim2_fail() {
uint32_t shape_a[] = {1, 1, 1, 1};
uint32_t shape_o[] = {1, 1, 2, 1};
float epsilon = 0.01;
test_invsqrt(shape_a, ZDNN_DLFLOAT16, ZDNN_FORMAT_4DFEATURE, shape_o,
ZDNN_DLFLOAT16, ZDNN_FORMAT_4DFEATURE, epsilon,
ZDNN_INVALID_SHAPE);
}
void invsqrt_verify_output_bad_dim3_fail() {
uint32_t shape_a[] = {1, 1, 1, 1};
uint32_t shape_o[] = {1, 2, 1, 1};
float epsilon = 0.01;
test_invsqrt(shape_a, ZDNN_DLFLOAT16, ZDNN_FORMAT_4DFEATURE, shape_o,
ZDNN_DLFLOAT16, ZDNN_FORMAT_4DFEATURE, epsilon,
ZDNN_INVALID_SHAPE);
}
void invsqrt_verify_output_bad_dim4_fail() {
uint32_t shape_a[] = {1, 1, 1, 1};
uint32_t shape_o[] = {2, 1, 1, 1};
float epsilon = 0.01;
test_invsqrt(shape_a, ZDNN_DLFLOAT16, ZDNN_FORMAT_4DFEATURE, shape_o,
ZDNN_DLFLOAT16, ZDNN_FORMAT_4DFEATURE, epsilon,
ZDNN_INVALID_SHAPE);
}
void invsqrt_verify_input_bad_layout_fail() {
uint32_t shape_a[] = {1, 1, 1, 1};
uint32_t shape_o[] = {1, 1, 1, 1};
float epsilon = 0.01;
test_invsqrt(shape_a, FP32, ZDNN_FORMAT_4DFEATURE, shape_o, ZDNN_DLFLOAT16,
ZDNN_FORMAT_4DFEATURE, epsilon, ZDNN_INVALID_TYPE);
}
void invsqrt_verify_input_bad_format_fail() {
uint32_t shape_a[] = {1, 1, 1, 1};
uint32_t shape_o[] = {1, 1, 1, 1};
float epsilon = 0.01;
test_invsqrt(shape_a, ZDNN_DLFLOAT16, ZDNN_FORMAT_4DKERNEL, shape_o,
ZDNN_DLFLOAT16, ZDNN_FORMAT_4DFEATURE, epsilon,
ZDNN_INVALID_FORMAT);
}
void invsqrt_verify_output_bad_layout_fail() {
uint32_t shape_a[] = {1, 1, 1, 1};
uint32_t shape_o[] = {1, 1, 1, 1};
float epsilon = 0.01;
test_invsqrt(shape_a, ZDNN_DLFLOAT16, ZDNN_FORMAT_4DFEATURE, shape_o, FP32,
ZDNN_FORMAT_4DFEATURE, epsilon, ZDNN_INVALID_TYPE);
}
void invsqrt_verify_output_bad_format_fail() {
uint32_t shape_a[] = {1, 1, 1, 1};
uint32_t shape_o[] = {1, 1, 1, 1};
float epsilon = 0.01;
test_invsqrt(shape_a, ZDNN_DLFLOAT16, ZDNN_FORMAT_4DFEATURE, shape_o,
ZDNN_DLFLOAT16, ZDNN_FORMAT_4DKERNEL, epsilon,
ZDNN_INVALID_FORMAT);
}
int main() {
UNITY_BEGIN();
RUN_TEST(verify_ztensor_format);
RUN_TEST(verify_1input_pass);
RUN_TEST(verify_2input_pass);
RUN_TEST(verify_3input_pass);
RUN_TEST(verify_input2_fail_shape);
RUN_TEST(verify_input3_fail_shape);
RUN_TEST(verify_input2_fail_format);
RUN_TEST(verify_input3_fail_format);
RUN_TEST(verify_input2_fail_dtype);
RUN_TEST(verify_input3_fail_dtype);
RUN_TEST(verify_output_fail_shape);
RUN_TEST(verify_output_fail_format);
RUN_TEST(verify_output_fail_dtype);
RUN_TEST(verify_matmul_op_pass);
RUN_TEST(verify_matmul_op_fail_output_shape);
RUN_TEST(verify_matmul_op_fail_input_shape);
RUN_TEST(verify_matmul_op_fail_output_format);
RUN_TEST(verify_matmul_op_fail_input_format);
RUN_TEST(verify_matmul_op_fail_output_type);
RUN_TEST(verify_matmul_op_fail_input_type);
RUN_TEST(verify_matmul_bcast_op_pass);
RUN_TEST(verify_matmul_bcast_op_fail_output_shape);
RUN_TEST(verify_matmul_bcast_op_fail_input_shape);
RUN_TEST(verify_matmul_bcast_op_fail_output_format);
RUN_TEST(verify_matmul_bcast_op_fail_input_format);
RUN_TEST(verify_matmul_bcast_op_fail_output_type);
RUN_TEST(verify_matmul_bcast_op_fail_input_type);
RUN_TEST(batchnorm_verify_pass);
RUN_TEST(batchnorm_verify_input_b_bad_dim2_fail);
RUN_TEST(batchnorm_verify_input_b_bad_dim1_fail);
RUN_TEST(batchnorm_verify_input_c_bad_dim2_fail);
RUN_TEST(batchnorm_verify_input_c_bad_dim1_fail);
RUN_TEST(relu_verify_pass);
RUN_TEST(relu_verify_fail_shape);
RUN_TEST(relu_verify_fail_format);
RUN_TEST(relu_verify_fail_dtype);
RUN_TEST(norm_verify_pass);
RUN_TEST(norm_verify_input_bad_dim4_fail);
RUN_TEST(norm_verify_input_bad_dim3_fail);
RUN_TEST(norm_verify_input_bad_dim2_fail);
RUN_TEST(norm_verify_input_bad_dim1_fail);
RUN_TEST(norm_verify_output_bad_dim1_fail);
RUN_TEST(norm_verify_bad_inputa_type_fail);
RUN_TEST(norm_verify_bad_inputb_type_fail);
RUN_TEST(norm_verify_bad_output_type_fail);
RUN_TEST(norm_verify_bad_inputa_format_fail);
RUN_TEST(norm_verify_bad_inputb_format_fail);
RUN_TEST(norm_verify_bad_output_format_fail);
RUN_TEST(moments_verify_pass);
RUN_TEST(moments_bad_bessel_correction);
RUN_TEST(moments_bad_out_a_dim4_fail);
RUN_TEST(moments_bad_out_a_dim3_fail);
RUN_TEST(moments_bad_out_a_dim2_fail);
RUN_TEST(moments_bad_out_a_dim1_fail);
RUN_TEST(moments_bad_out_b_dim4_fail);
RUN_TEST(moments_bad_out_b_dim3_fail);
RUN_TEST(moments_bad_out_b_dim2_fail);
RUN_TEST(moments_bad_out_b_dim1_fail);
RUN_TEST(moments_bad_format_in_fail);
RUN_TEST(moments_bad_format_out_a_fail);
RUN_TEST(moments_bad_format_out_b_fail);
RUN_TEST(moments_bad_type_in_fail);
RUN_TEST(moments_bad_type_out_a_fail);
RUN_TEST(moments_bad_type_out_b_fail);
RUN_TEST(layernorm_verify_pass);
RUN_TEST(layernorm_verify_bad_beta_fail);
RUN_TEST(layernorm_verify_bad_gamma_fail);
RUN_TEST(layernorm_verify_bad_epsilon_fail);
RUN_TEST(layernorm_verify_input_a_bad_dim1_fail);
RUN_TEST(layernorm_verify_input_a_bad_dim2_fail);
RUN_TEST(layernorm_verify_input_a_bad_dim3_fail);
RUN_TEST(layernorm_verify_input_b_bad_dim1_fail);
RUN_TEST(layernorm_verify_input_b_bad_dim2_fail);
RUN_TEST(layernorm_verify_input_b_bad_dim3_fail);
RUN_TEST(layernorm_verify_input_c_bad_dim1_fail);
RUN_TEST(layernorm_verify_input_c_bad_dim2_fail);
RUN_TEST(layernorm_verify_input_c_bad_dim3_fail);
RUN_TEST(layernorm_verify_bad_dim4_fail);
RUN_TEST(layernorm_verify_bad_inputa_type_fail);
RUN_TEST(layernorm_verify_bad_inputb_type_fail);
RUN_TEST(layernorm_verify_bad_inputc_type_fail);
RUN_TEST(layernorm_verify_bad_output_type_fail);
RUN_TEST(layernorm_verify_bad_inputa_format_fail);
RUN_TEST(layernorm_verify_bad_inputb_format_fail);
RUN_TEST(layernorm_verify_bad_inputc_format_fail);
RUN_TEST(layernorm_verify_bad_output_format_fail);
RUN_TEST(reduce_verify_pass);
RUN_TEST(reduce_verify_fail_shape);
RUN_TEST(reduce_verify_fail_format);
RUN_TEST(reduce_verify_fail_dtype);
RUN_TEST(invsqrt_verify_pass);
RUN_TEST(invsqrt_verify_bad_epsilon_fail);
RUN_TEST(invsqrt_verify_input_bad_dim1_fail);
RUN_TEST(invsqrt_verify_input_bad_dim2_fail);
RUN_TEST(invsqrt_verify_input_bad_dim3_fail);
RUN_TEST(invsqrt_verify_input_bad_dim4_fail);
RUN_TEST(invsqrt_verify_output_bad_dim1_fail);
RUN_TEST(invsqrt_verify_output_bad_dim2_fail);
RUN_TEST(invsqrt_verify_output_bad_dim3_fail);
RUN_TEST(invsqrt_verify_output_bad_dim4_fail);
RUN_TEST(invsqrt_verify_input_bad_layout_fail);
RUN_TEST(invsqrt_verify_input_bad_format_fail);
RUN_TEST(invsqrt_verify_output_bad_layout_fail);
RUN_TEST(invsqrt_verify_output_bad_format_fail);
return UNITY_END();
}
zDNN-1.1.2/tests/testDriver_tensor_verify_conv2d.c 0000664 0000000 0000000 00000045357 15000221702 0022221 0 ustar 00root root 0000000 0000000 // SPDX-License-Identifier: Apache-2.0
/*
* Copyright IBM Corp. 2021, 2024
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#include "testsupport.h"
#include
// struct for tensor information
typedef struct tensor_info {
uint32_t dims[ZDNN_MAX_DIMS];
zdnn_data_layouts layout;
zdnn_data_types dtype;
} tensor_info;
// struct for a set of inputs for a testcase (padding + tensors + strides)
typedef struct input_set {
zdnn_pool_padding padding;
tensor_info input;
tensor_info kernel;
tensor_info bias;
tensor_info output;
uint32_t stride_height;
uint32_t stride_width;
} input_set;
// "good input sets" - initialized during setUp(), shall NOT be modified by
// testcases afterwards
input_set same_padding_nonzero_stride;
input_set valid_padding_nonzero_stride;
input_set valid_padding_zero_stride;
#define DIM4 dims[0]
#define DIM3 dims[1]
#define DIM2 dims[2]
#define DIM1 dims[3]
#define INIT_TENSOR(set, info, dim4, dim3, dim2, dim1, l, t) \
set.info.DIM4 = dim4; \
set.info.DIM3 = dim3; \
set.info.DIM2 = dim2; \
set.info.DIM1 = dim1; \
set.info.layout = l; \
set.info.dtype = t;
void setUp(void) {
VERIFY_HW_ENV;
same_padding_nonzero_stride.padding = SAME_PADDING;
INIT_TENSOR(same_padding_nonzero_stride, input, 4, 6, 9, 5, ZDNN_NHWC, FP32);
INIT_TENSOR(same_padding_nonzero_stride, kernel, 3, 8, 5, 8, ZDNN_HWCK, FP32);
INIT_TENSOR(same_padding_nonzero_stride, bias, 8, -1, -1, -1, ZDNN_1D,
FP32); // -1 are ignored since it's 1D
INIT_TENSOR(same_padding_nonzero_stride, output, 4, 2, 5, 8, ZDNN_NHWC, FP32);
same_padding_nonzero_stride.stride_height = 3;
same_padding_nonzero_stride.stride_width = 2;
valid_padding_nonzero_stride.padding = VALID_PADDING;
INIT_TENSOR(valid_padding_nonzero_stride, input, 4, 6, 9, 5, ZDNN_NHWC, FP32);
INIT_TENSOR(valid_padding_nonzero_stride, kernel, 3, 8, 5, 8, ZDNN_HWCK,
FP32);
INIT_TENSOR(valid_padding_nonzero_stride, bias, 8, -1, -1, -1, ZDNN_1D, FP32);
INIT_TENSOR(valid_padding_nonzero_stride, output, 4, 2, 1, 8, ZDNN_NHWC,
FP32);
valid_padding_nonzero_stride.stride_height = 3;
valid_padding_nonzero_stride.stride_width = 2;
valid_padding_zero_stride.padding = VALID_PADDING;
INIT_TENSOR(valid_padding_zero_stride, input, 4, 3, 8, 5, ZDNN_NHWC, FP32);
INIT_TENSOR(valid_padding_zero_stride, kernel, 3, 8, 5, 8, ZDNN_HWCK, FP32);
INIT_TENSOR(valid_padding_zero_stride, bias, 8, -1, -1, -1, ZDNN_1D, FP32);
INIT_TENSOR(valid_padding_zero_stride, output, 4, 1, 1, 8, ZDNN_NHWC, FP32);
valid_padding_zero_stride.stride_height = 0;
valid_padding_zero_stride.stride_width = 0;
}
void tearDown(void) {}
#define NON_EXISTENT_FORMAT -1
#define NON_EXISTENT_DTYPE -1
void run_verify_conv2d_tensors_full(input_set set, zdnn_conv2d_act act_func,
bool use_non_existent_format,
bool use_non_existent_dtype,
zdnn_status expected_status) {
zdnn_status status = GENERAL_TESTCASE_FAILURE;
zdnn_ztensor *input_ztensor =
alloc_ztensor_with_values(set.input.dims, set.input.layout,
set.input.dtype, NO_CONCAT, true, ZERO_ARRAY);
zdnn_ztensor *kernel_ztensor =
alloc_ztensor_with_values(set.kernel.dims, set.kernel.layout,
set.kernel.dtype, NO_CONCAT, true, ZERO_ARRAY);
zdnn_ztensor *bias_ztensor =
alloc_ztensor_with_values(set.bias.dims, set.bias.layout, set.bias.dtype,
NO_CONCAT, true, ZERO_ARRAY);
zdnn_ztensor *output_ztensor =
alloc_ztensor_with_values(set.output.dims, set.output.layout,
set.output.dtype, NO_CONCAT, true, ZERO_ARRAY);
if (use_non_existent_dtype) {
output_ztensor->transformed_desc->type = NON_EXISTENT_DTYPE;
}
if (use_non_existent_format) {
output_ztensor->transformed_desc->format = NON_EXISTENT_FORMAT;
}
func_sp_parm1_conv2d conv2d_parm1;
memset(&conv2d_parm1, 0, sizeof(func_sp_parm1_conv2d));
conv2d_parm1.act = act_func;
conv2d_parm1.pad = set.padding;
func_sp_parm2_conv2d conv2d_parm2;
memset(&conv2d_parm2, 0, sizeof(func_sp_parm2_conv2d));
conv2d_parm2.stride_width = set.stride_width;
func_sp_parm3_conv2d conv2d_parm3;
memset(&conv2d_parm3, 0, sizeof(func_sp_parm3_conv2d));
conv2d_parm3.stride_height = set.stride_height;
func_sp_parm4_conv2d conv2d_parm4;
memset(&conv2d_parm4, 0, sizeof(func_sp_parm4_conv2d));
conv2d_parm4.clipping_value = 0;
// Make call to verify with our newly created ztensors and other inputs
TEST_ASSERT_MESSAGE_FORMATTED(
verify_conv2d_tensors(input_ztensor, kernel_ztensor, bias_ztensor,
&conv2d_parm1, &conv2d_parm2, &conv2d_parm3,
&conv2d_parm4, output_ztensor) == expected_status,
"Call to verify_conv2d_tensors() returned zdnn_status %08x but we "
"expected %08x",
status, expected_status);
free_ztensor_buffers(4, input_ztensor, kernel_ztensor, bias_ztensor,
output_ztensor);
}
void run_verify_conv2d_tensors(input_set set, zdnn_conv2d_act act_func,
zdnn_status expected_status) {
run_verify_conv2d_tensors_full(set, act_func, false, false, expected_status);
}
void same_padding_pass() {
input_set set;
memcpy(&set, &same_padding_nonzero_stride, sizeof(input_set));
run_verify_conv2d_tensors(set, CONV2D_ACT_NONE, ZDNN_OK);
}
void valid_padding_pass() {
input_set set;
memcpy(&set, &valid_padding_nonzero_stride, sizeof(input_set));
run_verify_conv2d_tensors(set, CONV2D_ACT_NONE, ZDNN_OK);
memcpy(&set, &valid_padding_zero_stride, sizeof(input_set));
run_verify_conv2d_tensors(set, CONV2D_ACT_NONE, ZDNN_OK);
}
// although actual op would fail, tensor-verify would pass
void unknown_padding_type_pass() {
input_set set;
memcpy(&set, &valid_padding_nonzero_stride, sizeof(input_set));
set.padding = -1;
run_verify_conv2d_tensors(set, CONV2D_ACT_NONE, ZDNN_OK);
}
void output_different_dtype_fail() {
input_set set[3];
memcpy(set, &same_padding_nonzero_stride, sizeof(input_set));
memcpy(set + 1, &valid_padding_nonzero_stride, sizeof(input_set));
memcpy(set + 2, &valid_padding_zero_stride, sizeof(input_set));
for (int i = 0; i < sizeof(set) / sizeof(input_set); i++) {
run_verify_conv2d_tensors_full(set[i], CONV2D_ACT_NONE, false, true,
ZDNN_INVALID_TYPE);
}
}
void output_different_format_fail() {
input_set set[3];
memcpy(set, &same_padding_nonzero_stride, sizeof(input_set));
memcpy(set + 1, &valid_padding_nonzero_stride, sizeof(input_set));
memcpy(set + 2, &valid_padding_zero_stride, sizeof(input_set));
for (int i = 0; i < sizeof(set) / sizeof(input_set); i++) {
run_verify_conv2d_tensors_full(set[i], CONV2D_ACT_NONE, true, false,
ZDNN_INVALID_FORMAT);
}
}
void bias_not_bias_fail() {
/*
The dimension-2, dimension-3, and dimension-4
index sizes of the input 3 tensor must be 1.
*/
input_set set[3];
memcpy(set, &same_padding_nonzero_stride, sizeof(input_set));
memcpy(set + 1, &valid_padding_nonzero_stride, sizeof(input_set));
memcpy(set + 2, &valid_padding_zero_stride, sizeof(input_set));
for (int i = 0; i < sizeof(set) / sizeof(input_set); i++) {
set[i].bias.dims[0] = 2;
set[i].bias.dims[1] = 8;
set[i].bias.layout = ZDNN_2D;
run_verify_conv2d_tensors(set[i], CONV2D_ACT_NONE, ZDNN_INVALID_SHAPE);
}
}
void different_output_dim4_input_dim4_fail() {
/*
The dimension-4-index-size of the output tensor must be equal to the
dimension-4-index-size of the input 1 tensor.
*/
input_set set[3];
memcpy(set, &same_padding_nonzero_stride, sizeof(input_set));
memcpy(set + 1, &valid_padding_nonzero_stride, sizeof(input_set));
memcpy(set + 2, &valid_padding_zero_stride, sizeof(input_set));
for (int i = 0; i < sizeof(set) / sizeof(input_set); i++) {
set[i].output.DIM4 = set[i].input.DIM4 + 1;
run_verify_conv2d_tensors(set[i], CONV2D_ACT_NONE, ZDNN_INVALID_SHAPE);
}
}
void different_output_dim1_input2_dim1_fail() {
/*
The dimension-1 index size of the output tensor must be equal to the
dimension-1 index size of the input 2 tensor and the dimension-1-index size of
the input 3 tensor.
*/
input_set set[3];
memcpy(set, &same_padding_nonzero_stride, sizeof(input_set));
memcpy(set + 1, &valid_padding_nonzero_stride, sizeof(input_set));
memcpy(set + 2, &valid_padding_zero_stride, sizeof(input_set));
for (int i = 0; i < sizeof(set) / sizeof(input_set); i++) {
set[i].output.DIM1 = set[i].kernel.DIM1 + 1;
run_verify_conv2d_tensors(set[i], CONV2D_ACT_NONE, ZDNN_INVALID_SHAPE);
}
}
void different_output_dim1_input3_dim1_fail() {
/*
The dimension-1 index size of the output tensor must be equal to the
dimension-1 index size of the input 2 tensor and the dimension-1-index size of
the input 3 tensor.
*/
input_set set[3];
memcpy(set, &same_padding_nonzero_stride, sizeof(input_set));
memcpy(set + 1, &valid_padding_nonzero_stride, sizeof(input_set));
memcpy(set + 2, &valid_padding_zero_stride, sizeof(input_set));
for (int i = 0; i < sizeof(set) / sizeof(input_set); i++) {
// bias is 1D so dimension-1-index came from dims[0]
set[i].output.DIM1 = set[i].bias.dims[0] + 1;
run_verify_conv2d_tensors(set[i], CONV2D_ACT_NONE, ZDNN_INVALID_SHAPE);
}
}
void different_input_dim1_input2_dim2_fail() {
/*
The dimension-1 index size of the input 1 tensor must be equal to the
dimension-2 index size of the input 2 tensor.
*/
input_set set[3];
memcpy(set, &same_padding_nonzero_stride, sizeof(input_set));
memcpy(set + 1, &valid_padding_nonzero_stride, sizeof(input_set));
memcpy(set + 2, &valid_padding_zero_stride, sizeof(input_set));
for (int i = 0; i < sizeof(set) / sizeof(input_set); i++) {
set[i].input.DIM1 = set[i].kernel.DIM2 + 1;
run_verify_conv2d_tensors(set[i], CONV2D_ACT_NONE, ZDNN_INVALID_SHAPE);
}
}
/*****************************************************
If the dimension-2-stride and the dimension-3-
stride are both zero all of the following additional
conditions must be true:
*****************************************************/
void different_input1_dim2_input2_dim3_fail() {
/*
The input 1 tensor dimension-2-index-size must be equal to the
dimension-3-index-size of input 2 tensor.
*/
input_set set;
memcpy(&set, &valid_padding_zero_stride, sizeof(input_set));
set.kernel.DIM3 = set.input.DIM2 + 1;
run_verify_conv2d_tensors(set, CONV2D_ACT_NONE, ZDNN_INVALID_SHAPE);
}
void different_input1_dim3_input2_dim4_fail() {
/*
The input 1 tensor dimension-3-index-size of the input tensor must be equal to
the dimension-4-index-size of input 2 tensor.
*/
input_set set;
memcpy(&set, &valid_padding_zero_stride, sizeof(input_set));
set.kernel.DIM4 = set.input.DIM3 + 1;
run_verify_conv2d_tensors(set, CONV2D_ACT_NONE, ZDNN_INVALID_SHAPE);
}
void output_dim2_not_one_fail() {
/*
The dimension-2-index-size and the dimension-3-index-size of the output tensor
must be one.
*/
input_set set;
memcpy(&set, &valid_padding_zero_stride, sizeof(input_set));
set.output.DIM2 = 2;
run_verify_conv2d_tensors(set, CONV2D_ACT_NONE, ZDNN_INVALID_SHAPE);
}
void output_dim3_not_one_fail() {
/*
The dimension-2-index-size and the dimension-3-index-size of the output tensor
must be one.
*/
input_set set;
memcpy(&set, &valid_padding_zero_stride, sizeof(input_set));
set.output.DIM3 = 2;
run_verify_conv2d_tensors(set, CONV2D_ACT_NONE, ZDNN_INVALID_SHAPE);
}
void zero_height_width_not_validpadding_fail() {
/*
The specified padding must be VALID.
*/
input_set set;
memcpy(&set, &valid_padding_zero_stride, sizeof(input_set));
set.padding = SAME_PADDING;
run_verify_conv2d_tensors(set, CONV2D_ACT_NONE, ZDNN_INVALID_STRIDE_PADDING);
}
/*********************************************
If the dimension-2-stride and the dimension-3-
stride are both greater than zero all of the
following additional conditions must be true:
*********************************************/
void valid_input_dim2_lessthan_kernel_dim3_fail() {
/*
When the specified padding is VALID, the dimension-2-index-size of the input 1
tensor must be greater than or equal to the dimension-3-index-size of input
tensor 2.
*/
input_set set;
memcpy(&set, &valid_padding_nonzero_stride, sizeof(input_set));
set.input.DIM2 = set.kernel.DIM3 - 1;
run_verify_conv2d_tensors(set, CONV2D_ACT_NONE, ZDNN_INVALID_SHAPE);
}
void valid_input_dim3_lessthan_kernel_dim4_fail() {
/*
When the specified padding is VALID, the dimension-3-index-size of the input 1
tensor must be greater than or equal to the dimension-4-index-size of the
input 2 tensor.
*/
input_set set;
memcpy(&set, &valid_padding_nonzero_stride, sizeof(input_set));
set.input.DIM3 = set.kernel.DIM4 - 1;
run_verify_conv2d_tensors(set, CONV2D_ACT_NONE, ZDNN_INVALID_SHAPE);
}
void same_big_math_equation1_fail() {
/*
When the specified padding is SAME, the following relationship between the
dimension-2-index-size and dimension-3-index-size of the input 1 tensor and
output tensor must be satisfied:
Dimension-2-index-size of the output tensor = ceil( Dimension-2-index-size
of the input 1 tensor / Dimension-2-stride)
Dimension-3-index-size of the output tensor = ceil( Dimension-3-index-size
of the input 1 tensor / Dimension-3-stride)
*/
input_set set;
memcpy(&set, &same_padding_nonzero_stride, sizeof(input_set));
set.stride_width = 1;
run_verify_conv2d_tensors(set, CONV2D_ACT_NONE, ZDNN_INVALID_SHAPE);
}
void same_big_math_equation2_fail() {
/*
When the specified padding is SAME, the following relationship between the
dimension-2-index-size and dimension-3-index-size of the input 1 tensor and
output tensor must be satisfied:
Dimension-2-index-size of the output tensor = ceil( Dimension-2-index-size
of the input 1 tensor / Dimension-2-stride)
Dimension-3-index-size of the output tensor = ceil( Dimension-3-index-size
of the input 1 tensor / Dimension-3-stride)
*/
input_set set;
memcpy(&set, &same_padding_nonzero_stride, sizeof(input_set));
set.stride_height = 1;
run_verify_conv2d_tensors(set, CONV2D_ACT_NONE, ZDNN_INVALID_SHAPE);
}
void valid_big_math_equation1_fail() {
/*
When the specified padding is VALID, the following relationship between the
dimension-2-index-size and dimension-3-index-sizes of the input 1 tensor,
dimension-3-index-size and dimension-4-index-size of the input 2 tensor and
output tensor must be satisfied:
Dimension-2-index-size of the output tensor = ceil(
(Dimension-2-index-size of the input 1 tensor - Dimension-3-index-size of
the input 2 tensor + 1 ) / Dimension-2-stride
Dimension-3-index-size of the output tensor = ceil(
(Dimension-3-index-size of the input 1 tensor - Dimension-4-index-size of
the input 2 tensor + 1 ) / Dimension-3-stride
*/
input_set set;
memcpy(&set, &valid_padding_nonzero_stride, sizeof(input_set));
set.stride_width = 1;
run_verify_conv2d_tensors(set, CONV2D_ACT_NONE, ZDNN_INVALID_SHAPE);
}
void valid_big_math_equation2_fail() {
/*
When the specified padding is VALID, the following relationship between the
dimension-2-index-size and dimension-3-index-sizes of the input 1 tensor,
dimension-3-index-size and dimension-4-index-size of the input 2 tensor and
output tensor must be satisfied:
Dimension-2-index-size of the output tensor = ceil(
(Dimension-2-index-size of the input 1 tensor - Dimension-3-index-size of
the input 2 tensor + 1 ) / Dimension-2-stride
Dimension-3-index-size of the output tensor = ceil(
(Dimension-3-index-size of the input 1 tensor - Dimension-4-index-size of
the input 2 tensor + 1 ) / Dimension-3-stride
*/
input_set set;
memcpy(&set, &valid_padding_nonzero_stride, sizeof(input_set));
set.stride_height = 1;
run_verify_conv2d_tensors(set, CONV2D_ACT_NONE, ZDNN_INVALID_SHAPE);
}
void height_zero_width_nonzero_fail() {
/*
If either the dimension-2-stride or the dimension3-stride is non-zero, then
both strides must be non-zero.
*/
input_set set[2];
memcpy(set, &same_padding_nonzero_stride, sizeof(input_set));
memcpy(set + 1, &valid_padding_nonzero_stride, sizeof(input_set));
for (int i = 0; i < sizeof(set) / sizeof(input_set); i++) {
set[i].stride_height = 0;
run_verify_conv2d_tensors(set[i], CONV2D_ACT_NONE, ZDNN_INVALID_STRIDES);
}
}
void height_nonzero_width_zero_fail() {
/*
If either the dimension-2-stride or the dimension3-stride is non-zero, then
both strides must be non-zero.
*/
input_set set[2];
memcpy(set, &same_padding_nonzero_stride, sizeof(input_set));
memcpy(set + 1, &valid_padding_nonzero_stride, sizeof(input_set));
for (int i = 0; i < sizeof(set) / sizeof(input_set); i++) {
set[i].stride_width = 0;
run_verify_conv2d_tensors(set[i], CONV2D_ACT_NONE, ZDNN_INVALID_STRIDES);
}
}
int main() {
UNITY_BEGIN();
RUN_TEST(same_padding_pass);
RUN_TEST(valid_padding_pass);
RUN_TEST(unknown_padding_type_pass);
RUN_TEST(output_different_dtype_fail);
RUN_TEST(output_different_format_fail);
RUN_TEST(bias_not_bias_fail);
RUN_TEST(different_output_dim4_input_dim4_fail);
RUN_TEST(different_output_dim1_input2_dim1_fail);
RUN_TEST(different_output_dim1_input3_dim1_fail);
RUN_TEST(different_input_dim1_input2_dim2_fail);
RUN_TEST(different_input1_dim2_input2_dim3_fail);
RUN_TEST(different_input1_dim3_input2_dim4_fail);
RUN_TEST(different_input1_dim3_input2_dim4_fail);
RUN_TEST(output_dim2_not_one_fail);
RUN_TEST(output_dim3_not_one_fail);
RUN_TEST(zero_height_width_not_validpadding_fail);
RUN_TEST(valid_input_dim2_lessthan_kernel_dim3_fail);
RUN_TEST(valid_input_dim3_lessthan_kernel_dim4_fail);
RUN_TEST(same_big_math_equation1_fail);
RUN_TEST(same_big_math_equation2_fail);
RUN_TEST(valid_big_math_equation1_fail);
RUN_TEST(valid_big_math_equation2_fail);
RUN_TEST(height_zero_width_nonzero_fail);
RUN_TEST(height_nonzero_width_zero_fail);
return UNITY_END();
}
zDNN-1.1.2/tests/testDriver_tensor_verify_lstm_gru.c 0000664 0000000 0000000 00000026456 15000221702 0022661 0 ustar 00root root 0000000 0000000 // SPDX-License-Identifier: Apache-2.0
/*
* Copyright IBM Corp. 2021, 2024
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#include "common_rnn.h"
#include "testsupport.h"
#include
#include
#include
#include
void setUp(void) { VERIFY_HW_ENV; }
void tearDown(void) {}
#define BAD_FORMAT 255
#define BAD_TYPE 255
#define DEFAULT_NUM_TIMESTEPS 3
#define DEFAULT_NUM_BATCHES 4
#define DEFAULT_NUM_FEATURES 7
#define DEFAULT_NUM_HIDDEN 16
const uint32_t num_batches = DEFAULT_NUM_BATCHES;
const uint32_t num_hidden = DEFAULT_NUM_HIDDEN;
#define MAX_DESC_LEN 256
char msg[MAX_DESC_LEN];
typedef enum tensor_idx {
FUSED,
BIAS,
CELLSTATE,
OUTPUT,
OUTPUT2,
MAX_TENSOR_IDX,
NONE = MAX_TENSOR_IDX
} tensor_idx;
// roll our own instead of using get_func_code_num_gates() in case that one
// breaks
#define NUM_GATES(f) ((f == NNPA_LSTMACT) ? 4 : 3)
void create_ztensors(uint8_t function_code, zdnn_ztensor **rnn_ztens) {
zdnn_data_layouts layout = ZDNN_NHWC;
zdnn_data_types dtype = FP32;
uint8_t num_gates = NUM_GATES(function_code);
// baseline dimensions with correct requirements
uint32_t *shape[MAX_TENSOR_IDX];
// create ztensors using transformed shape + ZDNN_NHWC to make the code
// simplier, so that we can loop through them all rather than dealing with
// different pre-transformed layouts etc.
shape[FUSED] = (uint32_t[]){num_gates, 1, num_batches, num_hidden};
shape[BIAS] = (uint32_t[]){num_gates, 1, num_batches, num_hidden};
shape[CELLSTATE] = (uint32_t[]){1, 1, num_batches, num_hidden};
shape[OUTPUT] = (uint32_t[]){1, 1, num_batches, num_hidden};
shape[OUTPUT2] =
shape[OUTPUT]; // they share the same shape, final timestep only
// rnn_ztens[FUSED] is the fuzed_ztensor split as a timestep
// rnn_ztens[BIAS] is the bias_add_ztensor that would be the result of the
// bias_add call within NNPA_LSTMACT function.
// rnn_ztens[CELLSTATE] is the cell state ztensor (only used in NNPA_LSTMACT)
// rnn_ztens[OUTPUT] is the result as output_ztensor1
// rnn_ztens[OUTPUT2] is the result as output_ztensor2
for (int i = 0; i < MAX_TENSOR_IDX; i++) {
rnn_ztens[i] = alloc_ztensor_with_values(shape[i], layout, dtype, NO_CONCAT,
true, ZERO_ARRAY);
}
}
void set_dim(zdnn_tensor_desc *desc, uint8_t dim_idx, uint32_t value) {
switch (dim_idx) {
case (1):
desc->dim1 = value;
break;
case (2):
desc->dim2 = value;
break;
case (3):
desc->dim3 = value;
break;
case (4):
desc->dim4 = value;
break;
default:
TEST_FAIL_MESSAGE_FORMATTED("%d is not a valid dim_idx to set.", dim_idx);
break;
}
}
// Verify return status by sabotaging a ztensor
void verify(uint8_t function_code, tensor_idx idx, bool sabotage_dim,
uint8_t dim_idx, uint32_t dim_val, bool sabotage_type,
zdnn_data_types type, bool sabotage_format,
zdnn_data_formats format, zdnn_status exp_status,
char *description) {
// Create the test tensors
zdnn_ztensor *rnn_ztens[MAX_TENSOR_IDX];
create_ztensors(function_code, rnn_ztens);
// Sabotage the dim/format/type of the ztensor specified in idx
if (idx != NONE) {
if (sabotage_dim) {
set_dim(rnn_ztens[idx]->transformed_desc, dim_idx, dim_val);
}
if (sabotage_type) {
rnn_ztens[idx]->transformed_desc->type = type;
}
if (sabotage_format) {
rnn_ztens[idx]->transformed_desc->format = format;
}
}
zdnn_status actual_status = verify_lstm_or_gru_act_tensors(
function_code, rnn_ztens[FUSED], rnn_ztens[BIAS], rnn_ztens[CELLSTATE],
rnn_ztens[OUTPUT], rnn_ztens[OUTPUT2]);
if (actual_status != exp_status) {
TEST_FAIL_MESSAGE_FORMATTED(
"%s: Actual status return (%08x) does not match expected (%08x).",
description, actual_status, exp_status);
}
// Cleanup
for (int i = 0; i < MAX_TENSOR_IDX; i++) {
free_ztensor_buffers(1, rnn_ztens[i]);
}
}
// Verify return status by sabotaging the ztensor dimension
void verify_shape(uint8_t function_code, tensor_idx idx, uint8_t dim_idx,
uint32_t dim_val, zdnn_status exp_status, char *description) {
verify(function_code, idx, true, dim_idx, dim_val, false, 0, false, 0,
exp_status, description);
}
// Verify return status by sabotaging the ztensor data type
void verify_type(uint8_t function_code, tensor_idx idx, zdnn_data_types type,
zdnn_status exp_status, char *description) {
verify(function_code, idx, false, 0, 0, true, type, false, 0, exp_status,
description);
}
// Verify return status by sabotaging the ztensor format
void verify_format(uint8_t function_code, tensor_idx idx,
zdnn_data_formats format, zdnn_status exp_status,
char *description) {
verify(function_code, idx, false, 0, 0, false, 0, true, format, exp_status,
description);
}
#define TEST_DIM_VAL(tensor_idx, dim_idx, val, exp_status) \
snprintf(msg, MAX_DESC_LEN, "%s %s dim%s", __func__, \
get_function_code_str(act), #dim_idx); \
verify_shape(act, tensor_idx, dim_idx, val, exp_status, msg);
/*
* Test verification of valid activation tensors.
* All tensors will be built with acceptable properties.
*/
void verify_pass() {
// Expect no known error, no bad dims will be set
LOOP_LSTM_AND_GRU(act) { TEST_DIM_VAL(NONE, 0, 0, ZDNN_OK); }
}
/*
* Test verification of failed output shape.
* Correct shape is (1, 1, num_batches, num_hidden)
* All input tensors will have acceptable descriptors.
*/
void verify_fail_output_shape() {
LOOP_LSTM_AND_GRU(act) {
// Expect failure when output_ztensor dimension 4 (timestep) is not 1
TEST_DIM_VAL(OUTPUT, 4, 2, ZDNN_INVALID_SHAPE);
// Expect failure when output_ztensor dimension 3 is not 1
TEST_DIM_VAL(OUTPUT, 3, 2, ZDNN_INVALID_SHAPE);
// Expect failure when output_ztensor dimension 2 does not match num_batches
TEST_DIM_VAL(OUTPUT, 2, num_batches + 1, ZDNN_INVALID_SHAPE);
// Expect failure when output_ztensor dimension 1 does not match num_hidden
TEST_DIM_VAL(OUTPUT, 1, num_hidden + 1, ZDNN_INVALID_SHAPE);
}
}
/*
* Test verification of failed output2 shape.
* Correct shape is (1, 1, num_batches, num_hidden)
* All input tensors will have acceptable descriptors.
*/
void verify_fail_output2_shape() {
int act = NNPA_LSTMACT;
// Expect failure when output_ztensor dimension 4 (timestep) is not 1
TEST_DIM_VAL(OUTPUT2, 4, 2, ZDNN_INVALID_SHAPE);
// Expect failure when output_ztensor dimension 3 is not 1
TEST_DIM_VAL(OUTPUT2, 3, 2, ZDNN_INVALID_SHAPE);
// Expect failure when output_ztensor dimension 2 does not match num_batches
TEST_DIM_VAL(OUTPUT2, 2, num_batches + 1, ZDNN_INVALID_SHAPE);
// Expect failure when output_ztensor dimension 1 does not match num_hidden
TEST_DIM_VAL(OUTPUT2, 1, num_hidden + 1, ZDNN_INVALID_SHAPE);
}
/*
* Test verification of failed fuzed_ztensor shape.
* Correct shape is (4, 1, num_batches, num_hidden) for LSTM,
* (3, 1, num_batches, num_hidden) for GRU
* All input tensors except fused will have acceptable descriptors.
*/
void verify_fail_fused_shape() {
LOOP_LSTM_AND_GRU(act) {
uint32_t num_gates = NUM_GATES(act);
// Expect failure when bias dimension 4 is not 4 (LSTM) or 3 (GRU)
TEST_DIM_VAL(FUSED, 4, num_gates + 1, ZDNN_INVALID_SHAPE);
// Expect failure when fused dimension 3 is not 1
TEST_DIM_VAL(FUSED, 3, 2, ZDNN_INVALID_SHAPE);
// Expect failure when fused dimension 2 does not match num_batches
TEST_DIM_VAL(FUSED, 2, num_batches + 1, ZDNN_INVALID_SHAPE);
// Expect failure when fused dimension 1 does not match num_hidden
TEST_DIM_VAL(FUSED, 1, num_hidden + 1, ZDNN_INVALID_SHAPE);
}
}
/*
* Test verification of failed bias_add_ztensor shape.
* Correct shape is (4, 1, num_batches, num_hidden) for LSTM,
* (3, 1, num_batches, num_hidden) for GRU
* All input tensors except bias will have acceptable descriptors.
*/
void verify_fail_bias_shape() {
LOOP_LSTM_AND_GRU(act) {
uint32_t num_gates = NUM_GATES(act);
// Expect failure when bias dimension 4 is not 4 (LSTM) or 3 (GRU)
TEST_DIM_VAL(BIAS, 4, num_gates + 1, ZDNN_INVALID_SHAPE);
// Expect failure when bias dimension 3 is not 1
TEST_DIM_VAL(BIAS, 3, 2, ZDNN_INVALID_SHAPE);
// Expect failure when bias dimension 2 does not match input
TEST_DIM_VAL(BIAS, 2, num_batches + 1, ZDNN_INVALID_SHAPE);
// Expect failure when bias dimension 1 does not match input
TEST_DIM_VAL(BIAS, 1, num_hidden + 1, ZDNN_INVALID_SHAPE);
}
}
/*
* Test verification of failed cell state ztensor shape.
* Correct shape is (1, 1, num_batches, num_hidden)
* All input tensors except cell-state will have acceptable descriptors.
*/
void verify_fail_cellstate_shape() {
LOOP_LSTM_AND_GRU(act) {
// Expect failure when cellstate dimension 4 is not 1
TEST_DIM_VAL(CELLSTATE, 4, 2, ZDNN_INVALID_SHAPE);
// Expect failure when cellstate dimension 3 is not 1
TEST_DIM_VAL(CELLSTATE, 3, 2, ZDNN_INVALID_SHAPE);
// Expect failure when cellstate dimension 2 does not match num_batches
TEST_DIM_VAL(CELLSTATE, 2, num_batches + 1, ZDNN_INVALID_SHAPE);
// Expect failure when cellstate dimension 2 does not matchnum_hidden
TEST_DIM_VAL(CELLSTATE, 1, num_hidden + 1, ZDNN_INVALID_SHAPE);
}
}
#define TEST_FORMAT(tensor_idx, format, exp_status) \
snprintf(msg, MAX_DESC_LEN, "%s %s %s", __func__, \
get_function_code_str(act), #tensor_idx); \
verify_format(act, tensor_idx, format, exp_status, msg);
/*
* Test verification of failed format.
*/
void verify_fail_format() {
LOOP_LSTM_AND_GRU(act) {
for (int i = 0; i < MAX_TENSOR_IDX; i++) {
if (act == NNPA_GRUACT && (i == CELLSTATE || i == OUTPUT2))
continue;
TEST_FORMAT(i, BAD_FORMAT, ZDNN_INVALID_FORMAT);
}
}
}
#define TEST_TYPE(tensor_idx, type, exp_status) \
snprintf(msg, MAX_DESC_LEN, "%s %s %s", __func__, \
get_function_code_str(act), #tensor_idx); \
verify_type(act, tensor_idx, type, exp_status, msg);
/*
* Test verification of failed type.
*/
void verify_fail_type() {
LOOP_LSTM_AND_GRU(act) {
for (int i = 0; i < MAX_TENSOR_IDX; i++) {
if (act == NNPA_GRUACT && (i == CELLSTATE || i == OUTPUT2))
continue;
TEST_TYPE(i, BAD_TYPE, ZDNN_INVALID_TYPE);
}
}
}
int main() {
UNITY_BEGIN();
RUN_TEST(verify_pass);
RUN_TEST(verify_fail_output_shape);
RUN_TEST(verify_fail_output2_shape);
RUN_TEST(verify_fail_fused_shape);
RUN_TEST(verify_fail_bias_shape);
RUN_TEST(verify_fail_cellstate_shape);
RUN_TEST(verify_fail_format);
RUN_TEST(verify_fail_type);
return UNITY_END();
}
zDNN-1.1.2/tests/testDriver_tensor_verify_pool_avg_max.c 0000664 0000000 0000000 00000045403 15000221702 0023471 0 ustar 00root root 0000000 0000000 // SPDX-License-Identifier: Apache-2.0
/*
* Copyright IBM Corp. 2021, 2024
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#include "common_pool.h"
void setUp(void) { VERIFY_HW_ENV; }
void tearDown(void) {}
#define NON_EXISTENT_FORMAT -1
#define NON_EXISTENT_DTYPE -1
void run_verify_pool_avg_max_tensors(
uint32_t *input_shape, zdnn_data_layouts input_layout,
zdnn_data_types input_dtype, uint32_t *output_shape,
zdnn_data_layouts output_layout, zdnn_data_types output_dtype,
zdnn_pool_padding padding_type, uint32_t kernel_height,
uint32_t kernel_width, uint32_t stride_height, uint32_t stride_width,
bool use_mismatch_dtype, zdnn_status expected_status) {
// Create status to check status after verify calls
zdnn_status status;
// We don't care about the values for these tests so just pass the zero array
zdnn_ztensor *input_ztensor = alloc_ztensor_with_values(
input_shape, input_layout, input_dtype, NO_CONCAT, true, ZERO_ARRAY);
zdnn_ztensor *output_ztensor = alloc_ztensor_with_values(
output_shape, output_layout, output_dtype, NO_CONCAT, true, ZERO_ARRAY);
// Special scenario. Test is checking what happens when input and output data
// types don't match. alloc_ztensor_with_values() above transforms into real
// ztensors, with ZDNN_DLFLOAT16. Forcibly break that for such tests.
if (use_mismatch_dtype) {
input_ztensor->transformed_desc->type = NON_EXISTENT_DTYPE;
}
func_sp_parm1_pool2d pool2d_parm1;
memset(&pool2d_parm1, 0, sizeof(func_sp_parm1_pool2d));
pool2d_parm1.pad = padding_type;
func_sp_parm2_pool2d pool2d_parm2;
memset(&pool2d_parm2, 0, sizeof(func_sp_parm2_pool2d));
pool2d_parm2.stride_width = stride_width;
func_sp_parm3_pool2d pool2d_parm3;
memset(&pool2d_parm3, 0, sizeof(func_sp_parm3_pool2d));
pool2d_parm3.stride_height = stride_height;
func_sp_parm4_pool2d pool2d_parm4;
memset(&pool2d_parm4, 0, sizeof(func_sp_parm4_pool2d));
pool2d_parm4.kernel_width = kernel_width;
func_sp_parm5_pool2d pool2d_parm5;
memset(&pool2d_parm5, 0, sizeof(func_sp_parm5_pool2d));
pool2d_parm5.kernel_height = kernel_height;
// Make call to verify with our newly created ztensors and other inputs
if ((status = verify_pool_avg_max_tensors(
input_ztensor, &pool2d_parm1, &pool2d_parm2, &pool2d_parm3,
&pool2d_parm4, &pool2d_parm5, output_ztensor)) != expected_status) {
TEST_FAIL_MESSAGE_FORMATTED(
"Call to verify_pool_avg_max_tensors() returned zdnn_status %08x "
"\"%s\" but we expected %08x \"%s\"",
status, zdnn_get_status_message(status), expected_status,
zdnn_get_status_message(expected_status));
}
// Cleanup
free_ztensor_buffers(2, input_ztensor, output_ztensor);
}
/*
* Simple test to confirm verification does not return any known error codes
* with valid SAME_PADDING values
*/
void verify_same_pass() {
uint32_t input_shape[] = {1, 8, 5, 1};
uint32_t output_shape[] = {1, 3, 3, 1};
uint32_t kernel_height = 3;
uint32_t kernel_width = 2;
uint32_t stride_height = 3;
uint32_t stride_width = 2;
run_verify_pool_avg_max_tensors(
input_shape, ZDNN_NHWC, FP32, output_shape, ZDNN_NHWC, FP32, SAME_PADDING,
kernel_height, kernel_width, stride_height, stride_width, false, ZDNN_OK);
}
/*
* Simple test to confirm verification passes with valid VALID_PADDING values
*/
void verify_valid_pass() {
uint32_t input_shape[] = {1, 8, 5, 1};
uint32_t output_shape[] = {1, 2, 2, 1};
uint32_t kernel_height = 3;
uint32_t kernel_width = 2;
uint32_t stride_height = 3;
uint32_t stride_width = 2;
run_verify_pool_avg_max_tensors(input_shape, ZDNN_NHWC, FP32, output_shape,
ZDNN_NHWC, FP32, VALID_PADDING, kernel_height,
kernel_width, stride_height, stride_width,
false, ZDNN_OK);
}
/*
* Verifying the input tensor with output. Should fail
* because the input and output tensors have different dtypes
*/
void verify_dtype_mismatch_fail() {
uint32_t input_shape[] = {1, 8, 5, 1};
uint32_t output_shape[] = {1, 3, 3, 1};
uint32_t kernel_height = 3;
uint32_t kernel_width = 2;
uint32_t stride_height = 3;
uint32_t stride_width = 2;
// Setting output dtype to FP16 instead of FP32 should cause failure
run_verify_pool_avg_max_tensors(input_shape, ZDNN_NHWC, FP32, output_shape,
ZDNN_NHWC, FP16, SAME_PADDING, kernel_height,
kernel_width, stride_height, stride_width,
true, ZDNN_INVALID_TYPE);
}
/*
* Verifying the input tensor with output. Should fail
* because the input and output tensor have different formats.
*/
void verify_format_mismatch_fail() {
uint32_t input_shape[] = {1, 8, 5, 1};
uint32_t output_shape[] = {1, 2, 2, 1};
uint32_t kernel_height = 3;
uint32_t kernel_width = 2;
uint32_t stride_height = 3;
uint32_t stride_width = 2;
// Setting input format to ZDNN_HWCK instead of NHWC should cause failure
run_verify_pool_avg_max_tensors(input_shape, ZDNN_HWCK, FP32, output_shape,
ZDNN_NHWC, FP32, VALID_PADDING, kernel_height,
kernel_width, stride_height, stride_width,
false, ZDNN_INVALID_FORMAT);
}
/*
* Verifying the input tensor with output. Should fail
* because the innermost dimension of the input and output are different
*/
void verify_bad_c_fail() {
uint32_t input_shape[] = {1, 8, 5, 1};
// Setting shape[3] to 4 instead of 1 should cause failure
uint32_t output_shape[] = {1, 3, 3, 4};
uint32_t kernel_height = 4;
uint32_t kernel_width = 4;
uint32_t stride_height = 3;
uint32_t stride_width = 3;
run_verify_pool_avg_max_tensors(input_shape, ZDNN_NHWC, FP32, output_shape,
ZDNN_NHWC, FP32, SAME_PADDING, kernel_height,
kernel_width, stride_height, stride_width,
false, ZDNN_INVALID_SHAPE);
}
/*
* Verifying the input tensor with output. Should fail
* because the outermost dimension of the input and output are different
*/
void verify_bad_n_fail() {
uint32_t input_shape[] = {1, 8, 5, 1};
// Setting shape[0] to 4 instead of 1 should cause failure
uint32_t output_shape[] = {4, 3, 3, 1};
uint32_t kernel_height = 4;
uint32_t kernel_width = 4;
uint32_t stride_height = 3;
uint32_t stride_width = 3;
run_verify_pool_avg_max_tensors(input_shape, ZDNN_NHWC, FP32, output_shape,
ZDNN_NHWC, FP32, SAME_PADDING, kernel_height,
kernel_width, stride_height, stride_width,
false, ZDNN_INVALID_SHAPE);
}
/*
* Simple test to confirm verification does not return any known error codes
* with valid SAME_PADDING values when strides are 0
*/
void verify_0_strides_pass() {
uint32_t input_shape[] = {1, 8, 5, 1};
uint32_t output_shape[] = {1, 1, 1, 1};
uint32_t kernel_height = 8;
uint32_t kernel_width = 5;
uint32_t stride_height = 0;
uint32_t stride_width = 0;
run_verify_pool_avg_max_tensors(input_shape, ZDNN_NHWC, FP32, output_shape,
ZDNN_NHWC, FP32, VALID_PADDING, kernel_height,
kernel_width, stride_height, stride_width,
false, ZDNN_OK);
}
/*
* Verifying the 0 stride values. Should fail
* because the the padding_type must be VALID_PADDING when strides are 0
*/
void verify_0_strides_same_padding_fail() {
uint32_t input_shape[] = {1, 8, 5, 1};
uint32_t output_shape[] = {1, 1, 1, 1};
uint32_t kernel_height = 8;
uint32_t kernel_width = 5;
uint32_t stride_height = 0;
uint32_t stride_width = 0;
run_verify_pool_avg_max_tensors(input_shape, ZDNN_NHWC, FP32, output_shape,
ZDNN_NHWC, FP32, SAME_PADDING, kernel_height,
kernel_width, stride_height, stride_width,
false, ZDNN_INVALID_STRIDE_PADDING);
}
/*
* Verifying the 0 stride values. Should fail
* because the second dimension stride value is greater than 0,
* and the third dimension stride value is 0.
*/
void verify_0_strides_stride_width_not_zero_fail() {
uint32_t input_shape[] = {1, 8, 5, 1};
uint32_t output_shape[] = {1, 3, 3, 1};
uint32_t kernel_height = 3;
uint32_t kernel_width = 2;
uint32_t stride_height = 0;
// Setting stride_width to 1 instead of 0 should cause failure
uint32_t stride_width = 1;
run_verify_pool_avg_max_tensors(input_shape, ZDNN_NHWC, FP32, output_shape,
ZDNN_NHWC, FP32, VALID_PADDING, kernel_height,
kernel_width, stride_height, stride_width,
false, ZDNN_INVALID_STRIDES);
}
/*
* Verifying the stride values. Should fail because the third dimension stride
* value is greater than 0, and the second dimension stride value is 0.
*/
void verify_0_strides_stride_height_not_zero_fail() {
uint32_t input_shape[] = {1, 8, 5, 1};
uint32_t output_shape[] = {1, 3, 3, 1};
uint32_t kernel_height = 3;
uint32_t kernel_width = 2;
// Setting stride_height to 1 instead of 0 should cause failure
uint32_t stride_height = 1;
uint32_t stride_width = 0;
run_verify_pool_avg_max_tensors(input_shape, ZDNN_NHWC, FP32, output_shape,
ZDNN_NHWC, FP32, VALID_PADDING, kernel_height,
kernel_width, stride_height, stride_width,
false, ZDNN_INVALID_STRIDES);
}
/*
* Verifying the input tensor with output. Should fail
* because stride values are both 0 and input dimension 2 is not equal to
* window dim 2
*/
void verify_0_strides_bad_kernel_width_fail() {
uint32_t input_shape[] = {1, 8, 5, 1};
uint32_t output_shape[] = {1, 1, 1, 1};
uint32_t kernel_height = 8;
// Setting kernel_width to 4 instead of 5 should cause failure
uint32_t kernel_width = 4;
uint32_t stride_height = 0;
uint32_t stride_width = 0;
run_verify_pool_avg_max_tensors(input_shape, ZDNN_NHWC, FP32, output_shape,
ZDNN_NHWC, FP32, VALID_PADDING, kernel_height,
kernel_width, stride_height, stride_width,
false, ZDNN_INVALID_SHAPE);
}
/*
* Verifying the input tensor with output. Should fail
* because stride values are both 0 and input dimension 3 is not equal
* to window_size dimension 3
*/
void verify_0_strides_bad_kernel_height_fail() {
uint32_t input_shape[] = {1, 8, 5, 1};
uint32_t output_shape[] = {1, 1, 1, 1};
// Setting kernel_height to 7 instead of 8 should cause failure
uint32_t kernel_height = 7;
uint32_t kernel_width = 5;
uint32_t stride_height = 0;
uint32_t stride_width = 0;
run_verify_pool_avg_max_tensors(input_shape, ZDNN_NHWC, FP32, output_shape,
ZDNN_NHWC, FP32, VALID_PADDING, kernel_height,
kernel_width, stride_height, stride_width,
false, ZDNN_INVALID_SHAPE);
}
/*
* Verifying the output tensor. Should fail because stride values are both 0 and
* output dimensions 2 and 3 are not equal to 1
*/
void verify_0_strides_bad_out_width_fail() {
uint32_t input_shape[] = {1, 8, 5, 1};
// Setting shape[2] to 2 instead of 1 should cause failure
uint32_t output_shape[] = {1, 1, 2, 1};
uint32_t kernel_height = 8;
uint32_t kernel_width = 5;
uint32_t stride_height = 0;
uint32_t stride_width = 0;
run_verify_pool_avg_max_tensors(input_shape, ZDNN_NHWC, FP32, output_shape,
ZDNN_NHWC, FP32, VALID_PADDING, kernel_height,
kernel_width, stride_height, stride_width,
false, ZDNN_INVALID_SHAPE);
}
/*
* Verifying the output tensor. Should fail because stride values are both 0 and
* output dimensions 2 and 3 are not equal to 1
*/
void verify_0_strides_bad_out_height_fail() {
uint32_t input_shape[] = {1, 8, 5, 1};
// Setting shape[1] to 2 instead of 1 should cause failure
uint32_t output_shape[] = {1, 2, 1, 1};
uint32_t kernel_height = 8;
uint32_t kernel_width = 5;
uint32_t stride_height = 0;
uint32_t stride_width = 0;
run_verify_pool_avg_max_tensors(input_shape, ZDNN_NHWC, FP32, output_shape,
ZDNN_NHWC, FP32, VALID_PADDING, kernel_height,
kernel_width, stride_height, stride_width,
false, ZDNN_INVALID_SHAPE);
}
/*
* Verifying the input and window values. Should fail
* because the second dimension window value is greater than the
* second dimension of the input tensor and the padding is VALID.
*/
void verify_valid_bad_kernel_width_fail() {
uint32_t input_shape[] = {1, 8, 5, 1};
uint32_t output_shape[] = {1, 2, 2, 1};
uint32_t kernel_height = 3;
// Setting kernel_width to 6 instead of 2 should cause failure
uint32_t kernel_width = 6;
uint32_t stride_height = 3;
uint32_t stride_width = 2;
run_verify_pool_avg_max_tensors(input_shape, ZDNN_NHWC, FP32, output_shape,
ZDNN_NHWC, FP32, VALID_PADDING, kernel_height,
kernel_width, stride_height, stride_width,
false, ZDNN_INVALID_SHAPE);
}
/*
* Verifying the input and window values. Should fail
* because the third dimension window value is greater than the
* third dimension of the input tensor and the padding is VALID.
*/
void verify_valid_bad_kernel_height_fail() {
uint32_t input_shape[] = {1, 8, 5, 1};
uint32_t output_shape[] = {1, 2, 2, 1};
// Setting kernel_width to 9 instead of 3 should cause failure
uint32_t kernel_height = 9;
uint32_t kernel_width = 2;
uint32_t stride_height = 3;
uint32_t stride_width = 2;
run_verify_pool_avg_max_tensors(input_shape, ZDNN_NHWC, FP32, output_shape,
ZDNN_NHWC, FP32, VALID_PADDING, kernel_height,
kernel_width, stride_height, stride_width,
false, ZDNN_INVALID_SHAPE);
}
/*
* Verifying the output tensor has the correct shape given the padding.
* This test should fail because the dimension 3 of the output tensor is not
* equal to the expected value and the padding is VALID_PADDING
*/
void verify_valid_bad_out_width_fail() {
uint32_t input_shape[] = {1, 8, 5, 1};
// Setting shape[2] to 3 instead of 2 should cause expected failure
uint32_t output_shape[] = {1, 2, 3, 1};
uint32_t kernel_height = 3;
uint32_t kernel_width = 2;
uint32_t stride_height = 3;
uint32_t stride_width = 2;
run_verify_pool_avg_max_tensors(input_shape, ZDNN_NHWC, FP32, output_shape,
ZDNN_NHWC, FP32, VALID_PADDING, kernel_height,
kernel_width, stride_height, stride_width,
false, ZDNN_INVALID_SHAPE);
}
/*
* Verifying the output tensor has the correct shape given the padding. This
* test should fail because the dimension 2 of the output tensor is not equal to
* the expected value and the padding is VALID_PADDING
*/
void verify_valid_bad_out_height_fail() {
uint32_t input_shape[] = {1, 8, 5, 1};
// Setting shape[1] to 3 instead of 2 should cause expected failure
uint32_t output_shape[] = {1, 3, 2, 1};
uint32_t kernel_height = 3;
uint32_t kernel_width = 2;
uint32_t stride_height = 3;
uint32_t stride_width = 2;
run_verify_pool_avg_max_tensors(input_shape, ZDNN_NHWC, FP32, output_shape,
ZDNN_NHWC, FP32, VALID_PADDING, kernel_height,
kernel_width, stride_height, stride_width,
false, ZDNN_INVALID_SHAPE);
}
/*
* Verifying the output tensor has the correct shape given the padding. This
* test should fail because the dimension 3 of the output tensor is not equal to
* the expected value and the padding is SAME_PADDING
*/
void verify_same_bad_out_width_fail() {
uint32_t input_shape[] = {1, 8, 5, 1};
// Setting shape[2] to 4 instead of 3 should cause expected failure
uint32_t output_shape[] = {1, 3, 4, 1};
uint32_t kernel_height = 3;
uint32_t kernel_width = 2;
uint32_t stride_height = 3;
uint32_t stride_width = 2;
run_verify_pool_avg_max_tensors(input_shape, ZDNN_NHWC, FP32, output_shape,
ZDNN_NHWC, FP32, SAME_PADDING, kernel_height,
kernel_width, stride_height, stride_width,
false, ZDNN_INVALID_SHAPE);
}
/*
* Verifying the output tensor has the correct shape given the padding. This
* test should fail because the dimension 2 of the output tensor is not equal to
* the expected value and the padding is SAME_PADDING
*/
void verify_same_bad_out_height_fail() {
uint32_t input_shape[] = {1, 8, 5, 1};
// Setting shape[1] to 4 instead of 3 should cause expected failure
uint32_t output_shape[] = {1, 4, 3, 1};
uint32_t kernel_height = 3;
uint32_t kernel_width = 2;
uint32_t stride_height = 3;
uint32_t stride_width = 2;
run_verify_pool_avg_max_tensors(input_shape, ZDNN_NHWC, FP32, output_shape,
ZDNN_NHWC, FP32, SAME_PADDING, kernel_height,
kernel_width, stride_height, stride_width,
false, ZDNN_INVALID_SHAPE);
}
int main() {
UNITY_BEGIN();
RUN_TEST(verify_same_pass);
RUN_TEST(verify_valid_pass);
RUN_TEST(verify_format_mismatch_fail);
RUN_TEST(verify_dtype_mismatch_fail);
RUN_TEST(verify_bad_c_fail);
RUN_TEST(verify_bad_n_fail);
RUN_TEST(verify_0_strides_pass);
RUN_TEST(verify_0_strides_same_padding_fail);
RUN_TEST(verify_0_strides_stride_width_not_zero_fail);
RUN_TEST(verify_0_strides_stride_height_not_zero_fail);
RUN_TEST(verify_0_strides_bad_kernel_width_fail);
RUN_TEST(verify_0_strides_bad_kernel_height_fail);
RUN_TEST(verify_0_strides_bad_out_width_fail);
RUN_TEST(verify_0_strides_bad_out_height_fail);
RUN_TEST(verify_valid_bad_kernel_width_fail);
RUN_TEST(verify_valid_bad_kernel_height_fail);
RUN_TEST(verify_valid_bad_out_width_fail);
RUN_TEST(verify_valid_bad_out_height_fail);
RUN_TEST(verify_same_bad_out_width_fail);
RUN_TEST(verify_same_bad_out_height_fail);
return UNITY_END();
}
zDNN-1.1.2/tests/testDriver_tensor_verify_transform.c 0000664 0000000 0000000 00000016440 15000221702 0023030 0 ustar 00root root 0000000 0000000 // SPDX-License-Identifier: Apache-2.0
/*
* Copyright IBM Corp. 2023, 2024
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#include "testsupport.h"
#include
#include
#include
#include
void setUp(void) {}
void tearDown(void) {}
/// Common test routine for transform tensors
///
/// \param[in] input_shape Pointer to input dim array
/// \param[in] input_format Input format
/// \param[in] input_type Input type
/// \param[in] output_shape Pointer to output dim array
/// \param[in] output_format Output format
/// \param[in] output_type Output type
/// \param[in] toc transformation-operation code
/// \param[in] min_clipping minimum clipping
/// \param[in] max_clipping maximum clipping
/// \param[in] exp_status Expected status
/// \param[in] error_msg Error message to prepend to the standard error
/// message
///
void test_transform(uint32_t input_shape[], zdnn_data_formats input_format,
zdnn_data_types input_type, uint32_t output_shape[],
zdnn_data_formats output_format,
zdnn_data_types output_type, uint32_t toc,
int8_t min_clipping, int8_t max_clipping,
zdnn_status exp_status, char *error_msg) {
zdnn_status status = ZDNN_OK;
zdnn_ztensor input, output;
zdnn_tensor_desc tfrmd_desc_input, tfrmd_desc_output;
input.transformed_desc = &tfrmd_desc_input;
output.transformed_desc = &tfrmd_desc_output;
init_transformed_desc(ZDNN_NHWC, input_type, input_format,
input.transformed_desc, input_shape[0], input_shape[1],
input_shape[2], input_shape[3]);
init_transformed_desc(ZDNN_NHWC, output_type, output_format,
output.transformed_desc, output_shape[0],
output_shape[1], output_shape[2], output_shape[3]);
func_sp_parm1_transform transform_parm1;
memset(&transform_parm1, 0, sizeof(func_sp_parm1_transform));
transform_parm1.toc = toc;
func_sp_parm4_transform transform_parm4;
memset(&transform_parm4, 0, sizeof(func_sp_parm4_transform));
transform_parm4.clip_min = min_clipping;
func_sp_parm5_transform transform_parm5;
memset(&transform_parm5, 0, sizeof(func_sp_parm5_transform));
transform_parm5.clip_max = max_clipping;
status = verify_transform_tensors(&input, &output, &transform_parm1,
&transform_parm4, &transform_parm5);
TEST_ASSERT_MESSAGE_FORMATTED(
exp_status == status, "%s Expected status = %08x, actual status = %08x",
error_msg, exp_status, status);
}
void transform_verify_pass_fp32_dlfloat() {
uint32_t input_shape[ZDNN_MAX_DIMS] = {1, 1, 2, 4};
uint32_t output_shape[ZDNN_MAX_DIMS] = {1, 1, 2, 4};
test_transform(input_shape, ZDNN_FORMAT_4DGENERIC, ZDNN_BINARY_FP32,
output_shape, ZDNN_FORMAT_4DFEATURE, ZDNN_DLFLOAT16,
NNPA_TOC_STICK_DLFLOAT, 0, 0, ZDNN_OK,
"DLFloat transform tensors are different.");
}
void transform_verify_pass_fp32_int8() {
uint32_t input_shape[ZDNN_MAX_DIMS] = {1, 1, 2, 4};
uint32_t output_shape[ZDNN_MAX_DIMS] = {1, 1, 2, 4};
test_transform(input_shape, ZDNN_FORMAT_4DGENERIC, ZDNN_BINARY_FP32,
output_shape, ZDNN_FORMAT_4DFEATURE, ZDNN_BINARY_INT8,
NNPA_TOC_STICK_DLFLOAT, 2, 3, ZDNN_OK,
"DLFloat transform tensors are different.");
}
void transform_verify_pass_dlfloat_fp32() {
uint32_t input_shape[ZDNN_MAX_DIMS] = {1, 1, 2, 4};
uint32_t output_shape[ZDNN_MAX_DIMS] = {1, 1, 2, 4};
test_transform(input_shape, ZDNN_FORMAT_4DFEATURE, ZDNN_DLFLOAT16,
output_shape, ZDNN_FORMAT_4DGENERIC, ZDNN_BINARY_FP32,
NNPA_TOC_STICK_DLFLOAT, 0, 0, ZDNN_OK,
"DLFloat transform tensors are different.");
}
void transform_verify_fail_shape_dim1() {
uint32_t input_shape[ZDNN_MAX_DIMS] = {1, 1, 2, 2};
uint32_t output_shape[ZDNN_MAX_DIMS] = {1, 1, 2, 4};
test_transform(input_shape, ZDNN_FORMAT_4DGENERIC, ZDNN_BINARY_FP32,
output_shape, ZDNN_FORMAT_4DFEATURE, ZDNN_DLFLOAT16,
NNPA_TOC_STICK_DLFLOAT, 0, 0, ZDNN_INVALID_SHAPE,
"Failed to fail on different shapes.");
}
void transform_verify_fail_shape_dim2() {
uint32_t input_shape[ZDNN_MAX_DIMS] = {1, 1, 4, 4};
uint32_t output_shape[ZDNN_MAX_DIMS] = {1, 1, 2, 4};
test_transform(input_shape, ZDNN_FORMAT_4DGENERIC, ZDNN_BINARY_FP32,
output_shape, ZDNN_FORMAT_4DFEATURE, ZDNN_DLFLOAT16,
NNPA_TOC_STICK_DLFLOAT, 0, 0, ZDNN_INVALID_SHAPE,
"Failed to fail on different shapes.");
}
void transform_verify_fail_shape_dim3() {
uint32_t input_shape[ZDNN_MAX_DIMS] = {1, 2, 2, 4};
uint32_t output_shape[ZDNN_MAX_DIMS] = {1, 1, 2, 4};
test_transform(input_shape, ZDNN_FORMAT_4DGENERIC, ZDNN_BINARY_FP32,
output_shape, ZDNN_FORMAT_4DFEATURE, ZDNN_BINARY_INT8,
NNPA_TOC_STICK_INT8, 2, 3, ZDNN_INVALID_SHAPE,
"Failed to fail on different shapes.");
}
void transform_verify_fail_shape_dim4() {
uint32_t input_shape[ZDNN_MAX_DIMS] = {2, 1, 2, 4};
uint32_t output_shape[ZDNN_MAX_DIMS] = {1, 1, 2, 4};
test_transform(input_shape, ZDNN_FORMAT_4DGENERIC, ZDNN_BINARY_FP32,
output_shape, ZDNN_FORMAT_4DFEATURE, ZDNN_BINARY_INT8,
NNPA_TOC_STICK_INT8, 2, 3, ZDNN_INVALID_SHAPE,
"Failed to fail on different shapes.");
}
void transform_verify_fail_clips_equal() {
uint32_t input_shape[ZDNN_MAX_DIMS] = {1, 1, 2, 4};
uint32_t output_shape[ZDNN_MAX_DIMS] = {1, 1, 2, 4};
test_transform(input_shape, ZDNN_FORMAT_4DGENERIC, ZDNN_BINARY_FP32,
output_shape, ZDNN_FORMAT_4DFEATURE, ZDNN_BINARY_INT8,
NNPA_TOC_STICK_INT8, 3, 3, ZDNN_INVALID_CLIPPING_VALUE,
"Failed to fail on invalid clipping value.");
}
void transform_verify_fail_invalid_clip() {
uint32_t input_shape[ZDNN_MAX_DIMS] = {1, 1, 2, 4};
uint32_t output_shape[ZDNN_MAX_DIMS] = {1, 1, 2, 4};
test_transform(input_shape, ZDNN_FORMAT_4DGENERIC, ZDNN_BINARY_FP32,
output_shape, ZDNN_FORMAT_4DFEATURE, ZDNN_BINARY_INT8,
NNPA_TOC_STICK_INT8, 4, 3, ZDNN_INVALID_CLIPPING_VALUE,
"Failed to fail on invalid clipping value.");
}
int main() {
UNITY_BEGIN();
RUN_TEST(transform_verify_pass_fp32_dlfloat);
RUN_TEST(transform_verify_pass_fp32_int8);
RUN_TEST(transform_verify_pass_dlfloat_fp32);
RUN_TEST(transform_verify_fail_shape_dim1);
RUN_TEST(transform_verify_fail_shape_dim2);
RUN_TEST(transform_verify_fail_shape_dim3);
RUN_TEST(transform_verify_fail_shape_dim4);
RUN_TEST(transform_verify_fail_clips_equal);
RUN_TEST(transform_verify_fail_invalid_clip);
return UNITY_END();
}
zDNN-1.1.2/tests/testDriver_tensor_verify_zdnn_lstm_gru.c 0000664 0000000 0000000 00000043012 15000221702 0023675 0 ustar 00root root 0000000 0000000 // SPDX-License-Identifier: Apache-2.0
/*
* Copyright IBM Corp. 2021, 2024
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#include "common_rnn.h"
#include "testsupport.h"
#include
#include
#include
#include
void setUp(void) { VERIFY_HW_ENV; }
void tearDown(void) {}
#define BAD_FORMAT 255
#define BAD_TYPE 255
#define DEFAULT_NUM_TIMESTEPS 3
#define DEFAULT_NUM_BATCHES 4
#define DEFAULT_NUM_FEATURES 7
#define DEFAULT_NUM_HIDDEN 16
const uint32_t num_timesteps = DEFAULT_NUM_TIMESTEPS;
const uint32_t num_batches = DEFAULT_NUM_BATCHES;
const uint32_t num_features = DEFAULT_NUM_FEATURES;
const uint32_t num_hidden = DEFAULT_NUM_HIDDEN;
#define MAX_DESC_LEN 256
char msg[MAX_DESC_LEN];
typedef enum tensor_idx {
INPUT,
H0,
C0,
WEIGHTS,
BIASES,
HIDDEN_WEIGHTS,
HIDDEN_BIASES,
HN_OUTPUT,
CF_OUTPUT,
MAX_TENSOR_IDX,
NONE = MAX_TENSOR_IDX
} tensor_idx;
// roll our own instead of using get_func_code_num_gates() in case that one
// breaks
#define NUM_GATES(f) ((f == NNPA_LSTMACT) ? 4 : 3)
void create_ztensors(uint8_t function_code, uint32_t num_timesteps,
uint32_t num_batches, uint32_t num_features,
uint32_t num_hidden, uint32_t num_dirs,
bool all_timesteps_out, zdnn_ztensor **rnn_ztens) {
zdnn_data_layouts layout = ZDNN_NHWC;
zdnn_data_types dtype = FP32;
uint8_t num_gates = NUM_GATES(function_code);
// baseline dimensions with correct requirements: fwd all-timesteps output
uint32_t *shape[MAX_TENSOR_IDX];
// create ztensors using transformed shape + ZDNN_NHWC to make the code
// simplier, so that we can loop through them all rather than dealing with
// different pre-transformed layouts etc.
//
// if the dims transformation logic changes then these shapes need to be
// changed too.
shape[INPUT] = (uint32_t[]){num_timesteps, 1, num_batches, num_features};
shape[H0] = (uint32_t[]){num_dirs, 1, num_batches, num_hidden};
shape[C0] = (uint32_t[]){num_dirs, 1, num_batches, num_hidden};
shape[WEIGHTS] =
(uint32_t[]){num_dirs, 1, num_features, num_gates * PADDED(num_hidden)};
shape[BIASES] = (uint32_t[]){num_dirs, 1, 1, num_gates * PADDED(num_hidden)};
shape[HIDDEN_WEIGHTS] =
(uint32_t[]){num_dirs, 1, num_hidden, num_gates * PADDED(num_hidden)};
shape[HIDDEN_BIASES] =
(uint32_t[]){num_dirs, 1, 1, num_gates * PADDED(num_hidden)};
shape[HN_OUTPUT] =
(uint32_t[]){all_timesteps_out ? num_timesteps : 1, 1, num_batches,
(num_dirs < 2) ? num_hidden : num_dirs * PADDED(num_hidden)};
shape[CF_OUTPUT] =
(uint32_t[]){1, 1, num_batches,
(num_dirs < 2) ? num_hidden : num_dirs * PADDED(num_hidden)};
for (int i = 0; i < MAX_TENSOR_IDX; i++) {
rnn_ztens[i] = alloc_ztensor_with_values(shape[i], layout, dtype, NO_CONCAT,
true, ZERO_ARRAY);
}
if (function_code == NNPA_GRUACT) {
// set these to NULL so the test will blow up if used inappropriately
shape[C0] = NULL;
shape[CF_OUTPUT] = NULL;
}
}
void set_dim(zdnn_tensor_desc *desc, uint8_t dim_idx, uint32_t value) {
switch (dim_idx) {
case (1):
desc->dim1 = value;
break;
case (2):
desc->dim2 = value;
break;
case (3):
desc->dim3 = value;
break;
case (4):
desc->dim4 = value;
break;
default:
TEST_FAIL_MESSAGE_FORMATTED("%d is not a valid dim_idx to set.", dim_idx);
break;
}
}
// Verify return status by sabotaging a ztensor
void verify(uint8_t function_code, lstm_gru_direction direction,
bool all_timesteps_out, tensor_idx idx, bool sabotage_dim,
uint8_t dim_idx, uint32_t dim_val, bool sabotage_type,
zdnn_data_types type, bool sabotage_format,
zdnn_data_formats format, zdnn_status exp_status,
char *description) {
// Create the test tensors set
zdnn_ztensor *rnn_ztens[MAX_TENSOR_IDX];
create_ztensors(function_code, num_timesteps, num_batches, num_features,
num_hidden, (direction == BIDIR) ? 2 : 1, all_timesteps_out,
rnn_ztens);
// Sabotage the dim/format/type of the ztensor specified in idx
if (idx != NONE) {
if (sabotage_dim) {
set_dim(rnn_ztens[idx]->transformed_desc, dim_idx, dim_val);
}
if (sabotage_type) {
rnn_ztens[idx]->transformed_desc->type = type;
}
if (sabotage_format) {
rnn_ztens[idx]->transformed_desc->format = format;
}
}
zdnn_status actual_status = verify_zdnn_lstm_or_gru_tensors(
function_code, rnn_ztens[INPUT], rnn_ztens[H0], rnn_ztens[C0],
rnn_ztens[WEIGHTS], rnn_ztens[BIASES], rnn_ztens[HIDDEN_WEIGHTS],
rnn_ztens[HIDDEN_BIASES], (direction == BIDIR) ? 2 : 1,
rnn_ztens[HN_OUTPUT], rnn_ztens[CF_OUTPUT]);
if (actual_status != exp_status) {
TEST_FAIL_MESSAGE_FORMATTED(
"%s: Actual status return (%08x) does not match expected (%08x).",
description, actual_status, exp_status);
}
// Cleanup
for (int i = 0; i < MAX_TENSOR_IDX; i++) {
free_ztensor_buffers(1, rnn_ztens[i]);
}
}
// Verify return status by sabotaging the ztensor dimension
void verify_shape(uint8_t function_code, lstm_gru_direction direction,
bool all_timesteps_out, tensor_idx idx, uint8_t dim_idx,
uint32_t dim_val, zdnn_status exp_status, char *description) {
verify(function_code, direction, all_timesteps_out, idx, true, dim_idx,
dim_val, false, 0, false, 0, exp_status, description);
}
// Verify return status by sabotaging the ztensor data type
void verify_type(uint8_t function_code, lstm_gru_direction direction,
bool all_timesteps_out, tensor_idx idx, zdnn_data_types type,
zdnn_status exp_status, char *description) {
verify(function_code, direction, all_timesteps_out, idx, false, 0, 0, true,
type, false, 0, exp_status, description);
}
// Verify return status by sabotaging the ztensor format
void verify_format(uint8_t function_code, lstm_gru_direction direction,
bool all_timesteps_out, tensor_idx idx,
zdnn_data_formats format, zdnn_status exp_status,
char *description) {
verify(function_code, direction, all_timesteps_out, idx, false, 0, 0, false,
0, true, format, exp_status, description);
}
// this macro assume lstm_gru_direction is an 0, 1, 2... enum
#define LOOP_ALL_LSTM_GRU_DIRECTIONS(lgd) for (int lgd = 0; lgd < 3; lgd++)
// this macro assumes false = 0, true = 1
#define LOOP_TRUE_AND_FALSE(tf) for (int tf = 0; tf < 2; tf++)
/*
* Test verification of valid activation tensors.
* All tensors will be built with acceptable properties.
*/
void verify_pass() {
// Expect no known error, no bad dims will be set
LOOP_LSTM_AND_GRU(act) {
LOOP_ALL_LSTM_GRU_DIRECTIONS(direction) {
LOOP_TRUE_AND_FALSE(all_timesteps_out) {
snprintf(msg, MAX_DESC_LEN, "%s %s %s all_timesteps_out: %s", __func__,
get_function_code_str(act),
get_rnn_direction_str(direction),
all_timesteps_out ? "true" : "false");
verify_shape(act, direction, all_timesteps_out, NONE, 0, 0, ZDNN_OK,
msg);
}
}
}
}
/*
* Verify num_timesteps is 0 situation
*/
void verify_timestep_zero_fail() {
LOOP_LSTM_AND_GRU(act) {
LOOP_ALL_LSTM_GRU_DIRECTIONS(direction) {
LOOP_TRUE_AND_FALSE(all_timesteps_out) {
snprintf(msg, MAX_DESC_LEN, "%s %s %s all_timesteps_out: %s", __func__,
get_function_code_str(act), get_rnn_direction_str(direction),
all_timesteps_out ? "true" : "false");
verify_shape(act, direction, all_timesteps_out, INPUT, 3, 0,
ZDNN_INVALID_SHAPE, msg);
}
}
}
}
/*
* Verify num_timesteps mismatch situations
*/
void verify_timestep_mismatch_fail() {
LOOP_LSTM_AND_GRU(act) {
LOOP_ALL_LSTM_GRU_DIRECTIONS(direction) {
LOOP_TRUE_AND_FALSE(all_timesteps_out) {
snprintf(msg, MAX_DESC_LEN, "%s %s %s all_timesteps_out: %s", __func__,
get_function_code_str(act), get_rnn_direction_str(direction),
all_timesteps_out ? "true" : "false");
verify_shape(act, direction, all_timesteps_out, H0, 3,
num_timesteps + 1, ZDNN_INVALID_SHAPE, msg);
}
}
}
}
/*
* Verify num_batches mismatch situations
*/
void verify_batches_mismatch_fail() {
LOOP_LSTM_AND_GRU(act) {
LOOP_ALL_LSTM_GRU_DIRECTIONS(direction) {
LOOP_TRUE_AND_FALSE(all_timesteps_out) {
// input, h0, c0 and all outputs require the same dim2 (num_batches)
#define TEST(tensor_idx) \
snprintf(msg, MAX_DESC_LEN, "%s %s %s %s all_timesteps_out: %s", __func__, \
get_function_code_str(act), #tensor_idx, \
get_rnn_direction_str(direction), \
all_timesteps_out ? "true" : "false"); \
verify_shape(act, direction, all_timesteps_out, tensor_idx, 2, \
num_batches + 1, ZDNN_INVALID_SHAPE, msg);
TEST(INPUT);
TEST(H0);
if (act == NNPA_LSTMACT) {
TEST(C0);
}
TEST(HN_OUTPUT);
if (act == NNPA_LSTMACT) {
TEST(CF_OUTPUT);
}
#undef TEST
}
}
}
}
/*
* Verify num_features mismatch situations
*/
void verify_features_mismatch_fail() {
LOOP_LSTM_AND_GRU(act) {
LOOP_ALL_LSTM_GRU_DIRECTIONS(direction) {
LOOP_TRUE_AND_FALSE(all_timesteps_out) {
snprintf(msg, MAX_DESC_LEN, "%s %s %s all_timesteps_out: %s", __func__,
get_function_code_str(act), get_rnn_direction_str(direction),
all_timesteps_out ? "true" : "false");
verify_shape(act, direction, all_timesteps_out, WEIGHTS, 2,
num_features + 1, ZDNN_INVALID_SHAPE, msg);
}
}
}
}
/*
* Verify num_hidden mismatch situations
*/
void verify_hidden_mismatch_fail() {
LOOP_LSTM_AND_GRU(act) {
LOOP_ALL_LSTM_GRU_DIRECTIONS(direction) {
LOOP_TRUE_AND_FALSE(all_timesteps_out) {
// h0, c0 and all outputs require the same dim1 (num_hidden)
#define TEST(tensor_idx) \
snprintf(msg, MAX_DESC_LEN, "%s %s%s %s all_timesteps_out: %s", __func__, \
get_function_code_str(act), #tensor_idx, \
get_rnn_direction_str(direction), \
all_timesteps_out ? "true" : "false"); \
verify_shape(act, direction, all_timesteps_out, tensor_idx, 1, \
num_hidden + 1, ZDNN_INVALID_SHAPE, msg);
TEST(H0);
if (act == NNPA_LSTMACT) {
TEST(C0);
}
TEST(HN_OUTPUT);
if (act == NNPA_LSTMACT) {
TEST(CF_OUTPUT);
}
#undef TEST
// hidden_weights dim2 is num_hidden
snprintf(msg, MAX_DESC_LEN, "%s %s %s %s all_timesteps_out: %s",
__func__, get_function_code_str(act), "HIDDEN_WEIGHTS",
get_rnn_direction_str(direction),
all_timesteps_out ? "true" : "false");
verify_shape(act, direction, all_timesteps_out, HIDDEN_WEIGHTS, 2,
num_hidden + 1, ZDNN_INVALID_SHAPE, msg);
// (hidden_) weights and biases should have in_pad value in dim1
uint32_t in_pad = NUM_GATES(act) * PADDED(num_hidden);
#define TEST(tensor_idx) \
snprintf(msg, MAX_DESC_LEN, "%s %s %s %s all_timesteps_out: %s", __func__, \
get_function_code_str(act), #tensor_idx, \
get_rnn_direction_str(direction), \
all_timesteps_out ? "true" : "false"); \
verify_shape(act, direction, all_timesteps_out, tensor_idx, 1, in_pad + 1, \
ZDNN_INVALID_SHAPE, msg);
TEST(WEIGHTS);
TEST(BIASES);
TEST(HIDDEN_WEIGHTS);
TEST(HIDDEN_BIASES);
#undef TEST
// the outputs should have out_pad value in dim1
uint32_t out_pad =
(direction != BIDIR) ? num_hidden : 2 * PADDED(num_hidden);
#define TEST(tensor_idx) \
snprintf(msg, MAX_DESC_LEN, "%s %s %s all_timesteps_out: %s", __func__, \
#tensor_idx, get_rnn_direction_str(direction), \
all_timesteps_out ? "true" : "false"); \
verify_shape(act, direction, all_timesteps_out, tensor_idx, 1, out_pad + 1, \
ZDNN_INVALID_SHAPE, msg);
TEST(HN_OUTPUT);
if (act == NNPA_LSTMACT) {
TEST(CF_OUTPUT);
}
#undef TEST
}
}
}
}
/*
* Verify num_dirs mismatch situations
*/
void verify_dirs_mismatch_fail() {
LOOP_LSTM_AND_GRU(act) {
LOOP_ALL_LSTM_GRU_DIRECTIONS(direction) {
LOOP_TRUE_AND_FALSE(all_timesteps_out) {
// h0, c0 and all outputs require the same dim4 (num_dirs)
#define TEST(tensor_idx) \
snprintf(msg, MAX_DESC_LEN, "%s %s %s %s all_timesteps_out: %s", __func__, \
get_function_code_str(act), #tensor_idx, \
get_rnn_direction_str(direction), \
all_timesteps_out ? "true" : "false"); \
verify_shape(act, direction, all_timesteps_out, tensor_idx, 4, \
((direction != BIDIR) ? 1 : 2) + 1, ZDNN_INVALID_SHAPE, msg);
TEST(H0);
if (act == NNPA_LSTMACT) {
TEST(C0);
}
TEST(WEIGHTS);
TEST(BIASES);
TEST(HIDDEN_WEIGHTS);
TEST(HIDDEN_BIASES);
#undef TEST
}
}
}
}
/*
* Verify other dims not covered in other tests
*/
void verify_other_dims_fail() {
LOOP_LSTM_AND_GRU(act) {
LOOP_ALL_LSTM_GRU_DIRECTIONS(direction) {
LOOP_TRUE_AND_FALSE(all_timesteps_out) {
// dim3 of all tensors should be 1
#define TEST(tensor_idx) \
snprintf(msg, MAX_DESC_LEN, "%s %s %s %s all_timesteps_out: %s", __func__, \
get_function_code_str(act), #tensor_idx, \
get_rnn_direction_str(direction), \
all_timesteps_out ? "true" : "false"); \
verify_shape(act, direction, all_timesteps_out, tensor_idx, 3, 2, \
ZDNN_INVALID_SHAPE, msg);
TEST(INPUT);
TEST(H0);
if (act == NNPA_LSTMACT) {
TEST(C0);
}
TEST(WEIGHTS);
TEST(BIASES);
TEST(HIDDEN_WEIGHTS);
TEST(HIDDEN_BIASES);
TEST(HN_OUTPUT);
if (act == NNPA_LSTMACT) {
TEST(CF_OUTPUT);
}
#undef TEST
// dim2 of (hidden_)biases should be 1
#define TEST(tensor_idx) \
snprintf(msg, MAX_DESC_LEN, "%s %s %s %s all_timesteps_out: %s", __func__, \
get_function_code_str(act), #tensor_idx, \
get_rnn_direction_str(direction), \
all_timesteps_out ? "true" : "false"); \
verify_shape(act, direction, all_timesteps_out, tensor_idx, 2, 2, \
ZDNN_INVALID_SHAPE, msg);
TEST(BIASES);
TEST(HIDDEN_BIASES);
#undef TEST
}
}
}
}
/*
* Test verification of failed format
*/
void verify_fail_format() {
LOOP_LSTM_AND_GRU(act) {
LOOP_ALL_LSTM_GRU_DIRECTIONS(direction) {
LOOP_TRUE_AND_FALSE(all_timesteps_out) {
snprintf(msg, MAX_DESC_LEN, "%s %s %s all_timesteps_out: %s", __func__,
get_function_code_str(act), get_rnn_direction_str(direction),
all_timesteps_out ? "true" : "false");
verify_format(act, direction, all_timesteps_out, HN_OUTPUT, BAD_FORMAT,
ZDNN_INVALID_FORMAT, msg);
}
}
}
}
/*
* Test verification of failed type
*/
void verify_fail_type() {
LOOP_LSTM_AND_GRU(act) {
LOOP_ALL_LSTM_GRU_DIRECTIONS(direction) {
LOOP_TRUE_AND_FALSE(all_timesteps_out) {
snprintf(msg, MAX_DESC_LEN, "%s %s %s all_timesteps_out: %s", __func__,
get_function_code_str(act), get_rnn_direction_str(direction),
all_timesteps_out ? "true" : "false");
verify_type(act, direction, all_timesteps_out, HN_OUTPUT, BAD_TYPE,
ZDNN_INVALID_TYPE, msg);
}
}
}
}
int main() {
UNITY_BEGIN();
RUN_TEST(verify_pass);
RUN_TEST(verify_timestep_zero_fail);
RUN_TEST(verify_timestep_mismatch_fail);
RUN_TEST(verify_batches_mismatch_fail);
RUN_TEST(verify_features_mismatch_fail);
RUN_TEST(verify_hidden_mismatch_fail);
RUN_TEST(verify_dirs_mismatch_fail);
RUN_TEST(verify_other_dims_fail);
RUN_TEST(verify_fail_format);
RUN_TEST(verify_fail_type);
return UNITY_END();
}
zDNN-1.1.2/tests/testDriver_tensordump.c 0000664 0000000 0000000 00000025436 15000221702 0020244 0 ustar 00root root 0000000 0000000 // SPDX-License-Identifier: Apache-2.0
/*
* Copyright IBM Corp. 2021, 2024
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#include
#include
#include
#include
#include "testsupport.h"
bool suppress_output;
void setUp(void) {
VERIFY_HW_ENV;
if (log_level == LOGLEVEL_DEBUG) {
suppress_output = false;
} else {
suppress_output = true;
}
}
void tearDown(void) {}
// Create pre and post descriptions
void init_tensor_descriptors(uint32_t dim4, uint32_t dim3, uint32_t dim2,
uint32_t dim1, zdnn_data_layouts layout,
zdnn_data_types data_type,
zdnn_tensor_desc *pre_tfrmd_desc,
zdnn_tensor_desc *tfrmd_desc) {
switch (layout) {
case ZDNN_1D:
zdnn_init_pre_transformed_desc(layout, data_type, pre_tfrmd_desc, dim1);
break;
case ZDNN_2D:
case ZDNN_2DS:
zdnn_init_pre_transformed_desc(layout, data_type, pre_tfrmd_desc, dim2,
dim1);
break;
case ZDNN_3D:
case ZDNN_3DS:
zdnn_init_pre_transformed_desc(layout, data_type, pre_tfrmd_desc, dim3,
dim2, dim1);
break;
default:
zdnn_init_pre_transformed_desc(layout, data_type, pre_tfrmd_desc, dim4,
dim3, dim2, dim1);
}
zdnn_status status =
zdnn_generate_transformed_desc(pre_tfrmd_desc, tfrmd_desc);
TEST_ASSERT_MESSAGE_FORMATTED(
status == ZDNN_OK,
"zdnn_generate_transformed_desc failed (status = %08x)", status);
}
void test_origtensor_dump(uint32_t dim4, uint32_t dim3, uint32_t dim2,
uint32_t dim1, zdnn_data_layouts layout,
zdnn_data_types data_type, dump_mode mode) {
zdnn_tensor_desc pre_tfrmd_desc, tfrmd_desc;
zdnn_ztensor ztensor;
zdnn_status status;
init_tensor_descriptors(dim4, dim3, dim2, dim1, layout, data_type,
&pre_tfrmd_desc, &tfrmd_desc);
status =
zdnn_init_ztensor_with_malloc(&pre_tfrmd_desc, &tfrmd_desc, &ztensor);
TEST_ASSERT_MESSAGE_FORMATTED(
status == ZDNN_OK, "zdnn_init_ztensor_with_malloc failed (status = %08x)",
status);
void *data = create_and_fill_random_fp_data(&ztensor);
printf("\n--- Pre-Transformed Tensor Dump (%s) ---\n",
get_data_type_str(data_type));
dumpdata_origtensor(&pre_tfrmd_desc, data, mode);
free(data);
zdnn_free_ztensor_buffer(&ztensor);
}
void test_tensor_data_dump(uint32_t dim4, uint32_t dim3, uint32_t dim2,
uint32_t dim1, zdnn_data_layouts layout,
zdnn_data_types data_type, dump_mode mode) {
zdnn_tensor_desc pre_tfrmd_desc, tfrmd_desc;
zdnn_ztensor ztensor;
zdnn_status status;
init_tensor_descriptors(dim4, dim3, dim2, dim1, layout, data_type,
&pre_tfrmd_desc, &tfrmd_desc);
status =
zdnn_init_ztensor_with_malloc(&pre_tfrmd_desc, &tfrmd_desc, &ztensor);
TEST_ASSERT_MESSAGE_FORMATTED(
status == ZDNN_OK, "zdnn_init_ztensor_with_malloc failed (status = %08x)",
status);
void *data = create_and_fill_random_fp_data(&ztensor);
// Transform the tensor
status = zdnn_transform_ztensor(&ztensor, data);
TEST_ASSERT_MESSAGE_FORMATTED(
status == ZDNN_OK, "zdnn_transform_ztensor() failed, status = %08x (%s)",
status, zdnn_get_status_message(status));
// Print transformed tensor dump
printf("\n--- Transformed (Stickified) Tensor Dump (%s) ---\n",
get_data_type_str(data_type));
dumpdata_ztensor(&ztensor, mode, false);
free(data);
zdnn_free_ztensor_buffer(&ztensor);
}
void test_tensor_dump_int8(uint32_t dim4, uint32_t dim3, uint32_t dim2,
uint32_t dim1, dump_mode mode) {
zdnn_tensor_desc pre_tfrmd_desc, tfrmd_desc;
zdnn_ztensor ztensor;
zdnn_status status;
int8_t *data;
zdnn_init_pre_transformed_desc(ZDNN_NHWC, INT8, &pre_tfrmd_desc, dim4, dim3,
dim2, dim1);
status = zdnn_generate_quantized_transformed_desc(
&pre_tfrmd_desc, QUANTIZED_INT8, &tfrmd_desc);
TEST_ASSERT_MESSAGE_FORMATTED(
status == ZDNN_OK,
"zdnn_generate_quantized_transformed_desc() failed (status = %08x)",
status);
status = zdnn_init_quantized_ztensor_with_malloc(&pre_tfrmd_desc, &tfrmd_desc,
0, 0, &ztensor);
TEST_ASSERT_MESSAGE_FORMATTED(
status == ZDNN_OK,
"zdnn_init_quantized_ztensor_with_malloc() failed (status = %08x)",
status);
data = create_and_fill_random_int8_data(&ztensor);
status =
zdnn_transform_quantized_ztensor(&ztensor, false, 0, 0, (void *)data);
TEST_ASSERT_MESSAGE_FORMATTED(
status == ZDNN_OK,
"zdnn_transform_quantized_ztensor() failed, status = %08x "
"(%s)",
status, zdnn_get_status_message(status));
printf("\n--- Transformed (Stickified) Tensor Dump (%s) ---\n",
get_data_type_str(INT8));
dumpdata_ztensor(&ztensor, mode, false);
free(data);
zdnn_free_ztensor_buffer(&ztensor);
}
void test_tensor_dump_no_page_break() {
zdnn_tensor_desc pre_tfrmd_desc, tfrmd_desc;
zdnn_ztensor ztensor;
zdnn_status status;
init_tensor_descriptors(1, 1, 1, 100, ZDNN_NHWC, FP32, &pre_tfrmd_desc,
&tfrmd_desc);
status =
zdnn_init_ztensor_with_malloc(&pre_tfrmd_desc, &tfrmd_desc, &ztensor);
TEST_ASSERT_MESSAGE_FORMATTED(
status == ZDNN_OK, "zdnn_init_ztensor_with_malloc failed (status = %08x)",
status);
void *data = create_and_fill_random_fp_data(&ztensor);
status = zdnn_transform_ztensor(&ztensor, data);
TEST_ASSERT_MESSAGE_FORMATTED(
status == ZDNN_OK, "zdnn_transform_ztensor() failed, status = %08x (%s)",
status, zdnn_get_status_message(status));
dumpdata_ztensor(&ztensor, AS_HEX, false);
free(data);
zdnn_free_ztensor_buffer(&ztensor);
}
void test_tensor_dump_with_page_break() {
zdnn_tensor_desc pre_tfrmd_desc, tfrmd_desc;
zdnn_ztensor ztensor;
zdnn_status status;
init_tensor_descriptors(1, 1, 1, 2150, ZDNN_NHWC, FP32, &pre_tfrmd_desc,
&tfrmd_desc);
status =
zdnn_init_ztensor_with_malloc(&pre_tfrmd_desc, &tfrmd_desc, &ztensor);
TEST_ASSERT_MESSAGE_FORMATTED(
status == ZDNN_OK, "zdnn_init_ztensor_with_malloc failed (status = %08x)",
status);
void *data = create_and_fill_random_fp_data(&ztensor);
status = zdnn_transform_ztensor(&ztensor, data);
TEST_ASSERT_MESSAGE_FORMATTED(
status == ZDNN_OK, "zdnn_transform_ztensor() failed, status = %08x (%s)",
status, zdnn_get_status_message(status));
dumpdata_ztensor(&ztensor, AS_HEX, false);
free(data);
zdnn_free_ztensor_buffer(&ztensor);
}
// Only if log_level is set to debug, print output
void run_test_suppress_output(void (*test_func)(void)) {
// save origin stdout
FILE *original_stdout = stdout;
// create null area for std about to printed
FILE *null_out = fopen("/dev/null", "w");
if (!suppress_output) {
test_func();
fclose(null_out);
return;
}
// if log_level not set, print output to null area
stdout = null_out;
test_func();
// restore original stdout
stdout = original_stdout;
fclose(null_out);
}
void test_simple_1D_hex_bfloat_dump() {
test_origtensor_dump(1, 1, 1, 1, ZDNN_1D, BFLOAT, AS_HEX);
test_tensor_data_dump(1, 1, 1, 1, ZDNN_1D, BFLOAT, AS_HEX);
}
void test_simple_1D_float_bfloat_dump() {
test_origtensor_dump(1, 1, 1, 1, ZDNN_1D, BFLOAT, AS_FLOAT);
test_tensor_data_dump(1, 1, 1, 1, ZDNN_1D, BFLOAT, AS_FLOAT);
}
void test_simple_hex_fp16_dump() {
test_origtensor_dump(1, 1, 1, 1, ZDNN_NHWC, FP16, AS_HEX);
test_tensor_data_dump(1, 1, 1, 1, ZDNN_NHWC, FP16, AS_HEX);
}
void test_simple_float_fp16_dump() {
test_origtensor_dump(1, 1, 1, 1, ZDNN_NHWC, FP16, AS_FLOAT);
test_tensor_data_dump(1, 1, 1, 1, ZDNN_NHWC, FP16, AS_FLOAT);
}
void test_simple_hex_fp32_dump() {
test_origtensor_dump(1, 1, 1, 1, ZDNN_NHWC, FP32, AS_HEX);
test_tensor_data_dump(1, 1, 1, 1, ZDNN_NHWC, FP32, AS_HEX);
}
void test_simple_float_fp32_dump() {
test_origtensor_dump(1, 1, 1, 1, ZDNN_NHWC, FP32, AS_FLOAT);
test_tensor_data_dump(1, 1, 1, 1, ZDNN_NHWC, FP32, AS_FLOAT);
}
void test_simple_hex_int8_dump() { test_tensor_dump_int8(1, 1, 1, 1, AS_HEX); }
void test_simple_float_int8_dump() {
test_tensor_dump_int8(1, 1, 1, 1, AS_FLOAT);
}
// Wrapper functions for Unity to run
void test_simple_1D_hex_bfloat_dump_with_suppression() {
run_test_suppress_output(test_simple_1D_hex_bfloat_dump);
}
void test_simple_1D_float_bfloat_dump_with_suppression() {
run_test_suppress_output(test_simple_1D_float_bfloat_dump);
}
void test_simple_hex_fp16_dump_with_suppression() {
run_test_suppress_output(test_simple_hex_fp16_dump);
}
void test_simple_float_fp16_dump_with_suppression() {
run_test_suppress_output(test_simple_float_fp16_dump);
}
void test_simple_hex_fp32_dump_with_suppression() {
run_test_suppress_output(test_simple_hex_fp32_dump);
}
void test_simple_float_fp32_dump_with_suppression() {
run_test_suppress_output(test_simple_float_fp32_dump);
}
void test_simple_hex_int8_dump_with_suppression() {
run_test_suppress_output(test_simple_hex_int8_dump);
}
void test_simple_float_int8_dump_with_suppression() {
run_test_suppress_output(test_simple_float_int8_dump);
}
void test_tensor_dump_no_page_break_with_suppression() {
run_test_suppress_output(test_tensor_dump_no_page_break);
}
void test_tensor_dump_with_page_break_with_suppression() {
run_test_suppress_output(test_tensor_dump_with_page_break);
}
int main(void) {
UNITY_BEGIN();
// If log_level is set to debug, output will printed.
// Otherwise, nothing will be printed but tests will still run
// BFLOAT
RUN_TEST(test_simple_1D_hex_bfloat_dump_with_suppression);
RUN_TEST(test_simple_1D_float_bfloat_dump_with_suppression);
// FP16
RUN_TEST(test_simple_hex_fp16_dump_with_suppression);
RUN_TEST(test_simple_float_fp16_dump_with_suppression);
// FP32
RUN_TEST(test_simple_hex_fp32_dump_with_suppression);
RUN_TEST(test_simple_float_fp32_dump_with_suppression);
// INT8 Quantized
RUN_TEST(test_simple_hex_int8_dump_with_suppression);
RUN_TEST(test_simple_float_int8_dump_with_suppression);
// Page Break
RUN_TEST(test_tensor_dump_no_page_break_with_suppression);
RUN_TEST(test_tensor_dump_with_page_break_with_suppression);
return UNITY_END();
}
zDNN-1.1.2/tests/testDriver_transform_dims.c 0000664 0000000 0000000 00000026741 15000221702 0021073 0 ustar 00root root 0000000 0000000 // SPDX-License-Identifier: Apache-2.0
/*
* Copyright IBM Corp. 2021, 2024
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#include "testsupport.h"
#include
#include
#include
#include
void setUp(void) {}
void tearDown(void) {}
/*
Common routine for testing dimension translation
Transformed dimensions must match the expected
*/
void test_tfrmd_dims(zdnn_data_layouts pre_tfrmd_layout,
uint32_t pre_tfrmd_dim4, uint32_t pre_tfrmd_dim3,
uint32_t pre_tfrmd_dim2, uint32_t pre_tfrmd_dim1,
uint32_t tfrmd_dim4, uint32_t tfrmd_dim3,
uint32_t tfrmd_dim2, uint32_t tfrmd_dim1) {
zdnn_tensor_desc pre_tfrmd_desc, tfrmd_desc;
zdnn_status status;
switch (pre_tfrmd_layout) {
case (ZDNN_1D):
zdnn_init_pre_transformed_desc(pre_tfrmd_layout, test_datatype,
&pre_tfrmd_desc, pre_tfrmd_dim1);
break;
case (ZDNN_2D):
case (ZDNN_2DS):
zdnn_init_pre_transformed_desc(pre_tfrmd_layout, test_datatype,
&pre_tfrmd_desc, pre_tfrmd_dim2,
pre_tfrmd_dim1);
break;
case (ZDNN_3D):
case (ZDNN_3DS):
zdnn_init_pre_transformed_desc(pre_tfrmd_layout, test_datatype,
&pre_tfrmd_desc, pre_tfrmd_dim3,
pre_tfrmd_dim2, pre_tfrmd_dim1);
break;
default:
zdnn_init_pre_transformed_desc(
pre_tfrmd_layout, test_datatype, &pre_tfrmd_desc, pre_tfrmd_dim4,
pre_tfrmd_dim3, pre_tfrmd_dim2, pre_tfrmd_dim1);
}
status = zdnn_generate_transformed_desc(&pre_tfrmd_desc, &tfrmd_desc);
TEST_ASSERT_MESSAGE_FORMATTED(
status == ZDNN_OK,
"zdnn_generate_transformed_desc() failed (status = %08x)", status);
TEST_ASSERT_MESSAGE_FORMATTED(
tfrmd_desc.dim4 == tfrmd_dim4,
"tfrmd_desc.dim4 (%u) doesn't match expected (%u)", tfrmd_desc.dim4,
tfrmd_dim4);
TEST_ASSERT_MESSAGE_FORMATTED(
tfrmd_desc.dim3 == tfrmd_dim3,
"tfrmd_desc.dim3 (%u) doesn't match expected (%u)", tfrmd_desc.dim3,
tfrmd_dim3);
TEST_ASSERT_MESSAGE_FORMATTED(
tfrmd_desc.dim2 == tfrmd_dim2,
"tfrmd_desc.dim2 (%u) doesn't match expected (%u)", tfrmd_desc.dim2,
tfrmd_dim4);
TEST_ASSERT_MESSAGE_FORMATTED(
tfrmd_desc.dim1 == tfrmd_dim1,
"tfrmd_desc.dim1 (%u) doesn't match expected (%u)", tfrmd_desc.dim1,
tfrmd_dim4);
}
/*
Common routine for testing dimension translation (concatenated types)
Transformed dimensions must match the expected
pre_tfrmd_dim3 is ignored when pre_tfrmd_layout is ZDNN_2DS
*/
void test_tfrmd_concat_dims(zdnn_data_layouts pre_tfrmd_layout,
uint32_t pre_tfrmd_dim3, uint32_t pre_tfrmd_dim2,
uint32_t pre_tfrmd_dim1, zdnn_concat_info info) {
zdnn_tensor_desc pre_tfrmd_desc, tfrmd_desc;
zdnn_status status;
uint32_t expected_dim4 = 0, expected_dim3 = 0, expected_dim2 = 0,
expected_dim1 = 0;
uint8_t num_concats = 0;
if (CONCAT_RNN_TYPE(info) == RNN_TYPE_LSTM) {
num_concats = 4;
} else if (CONCAT_RNN_TYPE(info) == RNN_TYPE_GRU) {
num_concats = 3;
} else {
TEST_FAIL_MESSAGE_FORMATTED("bad concat info: %08x\n", info);
}
switch (pre_tfrmd_layout) {
case (ZDNN_2DS):
expected_dim4 = pre_tfrmd_dim2;
expected_dim3 = 1;
expected_dim2 = 1;
expected_dim1 = CEIL(pre_tfrmd_dim1, AIU_2BYTE_CELLS_PER_STICK) *
AIU_2BYTE_CELLS_PER_STICK * num_concats;
zdnn_init_pre_transformed_desc(pre_tfrmd_layout, test_datatype,
&pre_tfrmd_desc, pre_tfrmd_dim2,
pre_tfrmd_dim1);
break;
case (ZDNN_3DS):
expected_dim4 = pre_tfrmd_dim3;
expected_dim3 = 1;
if ((CONCAT_USAGE(info) == USAGE_WEIGHTS) &&
(CONCAT_PREV_LAYER(info) == PREV_LAYER_BIDIR)) {
expected_dim2 = CEIL(pre_tfrmd_dim2 / 2, AIU_2BYTE_CELLS_PER_STICK) *
AIU_2BYTE_CELLS_PER_STICK * 2;
} else {
expected_dim2 = pre_tfrmd_dim2;
}
expected_dim1 = CEIL(pre_tfrmd_dim1, AIU_2BYTE_CELLS_PER_STICK) *
AIU_2BYTE_CELLS_PER_STICK * num_concats;
zdnn_init_pre_transformed_desc(pre_tfrmd_layout, test_datatype,
&pre_tfrmd_desc, pre_tfrmd_dim3,
pre_tfrmd_dim2, pre_tfrmd_dim1);
break;
default:
TEST_FAIL_MESSAGE("unknown pre_tfrmd_layout");
break;
}
status = zdnn_generate_transformed_desc_concatenated(&pre_tfrmd_desc, info,
&tfrmd_desc);
TEST_ASSERT_MESSAGE_FORMATTED(
status == ZDNN_OK,
"zdnn_generate_transformed_desc() status is %08x (%s) "
"but expects %08x (%s))",
status, zdnn_get_status_message(status), ZDNN_OK,
zdnn_get_status_message(ZDNN_OK));
TEST_ASSERT_MESSAGE_FORMATTED(
tfrmd_desc.dim4 == expected_dim4,
"tfrmd_desc.dim4 (%u) doesn't match expected (%u)", tfrmd_desc.dim4,
expected_dim4);
TEST_ASSERT_MESSAGE_FORMATTED(
tfrmd_desc.dim3 == expected_dim3,
"tfrmd_desc.dim3 (%u) doesn't match expected (%u)", tfrmd_desc.dim3,
expected_dim3);
TEST_ASSERT_MESSAGE_FORMATTED(
tfrmd_desc.dim2 == expected_dim2,
"tfrmd_desc.dim2 (%u) doesn't match expected (%u)", tfrmd_desc.dim2,
expected_dim2);
TEST_ASSERT_MESSAGE_FORMATTED(
tfrmd_desc.dim1 == expected_dim1,
"tfrmd_desc.dim1 (%u) doesn't match expected (%u)", tfrmd_desc.dim1,
expected_dim1);
}
void test_tfrmd_dims_nhwc_1() {
test_tfrmd_dims(ZDNN_NHWC, 1, 1, 1, 3, 1, 1, 1, 3);
}
void test_tfrmd_dims_nhwc_2() {
test_tfrmd_dims(ZDNN_NHWC, 4, 3, 2, 7, 4, 3, 2, 7);
}
void test_tfrmd_dims_4d() { test_tfrmd_dims(ZDNN_4D, 2, 3, 2, 3, 2, 3, 2, 3); }
void test_tfrmd_dims_3ds_1() {
test_tfrmd_dims(ZDNN_3DS, 0, 5, 1, 3, 5, 1, 1, 3);
}
void test_tfrmd_dims_3ds_2() {
test_tfrmd_dims(ZDNN_3DS, 0, 3, 4, 2, 3, 1, 4, 2);
}
void test_tfrmd_dims_3d() {
test_tfrmd_dims(ZDNN_3D, 0, 16, 32, 5, 1, 16, 32, 5);
}
void test_tfrmd_dims_2ds() {
test_tfrmd_dims(ZDNN_2DS, 0, 0, 4, 2, 4, 1, 1, 2);
}
void test_tfrmd_dims_2d() { test_tfrmd_dims(ZDNN_2D, 0, 0, 2, 5, 1, 1, 2, 5); }
void test_tfrmd_dims_1d() { test_tfrmd_dims(ZDNN_1D, 0, 0, 0, 5, 1, 1, 1, 5); }
void test_tfrmd_dims_lstm_biases() {
for (int i = 0; i < NUM_PREV_LAYERS; i++) {
for (int j = 0; j < NUM_BIASES_USAGES; j++) {
test_tfrmd_concat_dims(ZDNN_2DS, 0, 2, 16,
RNN_TYPE_LSTM | prev_layers[i] | biases_usages[j]);
}
}
}
void test_tfrmd_dims_lstm_no_vconcat_weights() {
for (int i = 0; i < NUM_NO_VCONCAT_INFOS; i++) {
test_tfrmd_concat_dims(ZDNN_3DS, 2, 15, 72,
RNN_TYPE_LSTM | no_vconcat_infos[i]);
}
}
void test_tfrmd_dims_lstm_prev_bidir_weights() {
test_tfrmd_concat_dims(ZDNN_3DS, 2, 20, 72,
RNN_TYPE_LSTM | PREV_LAYER_BIDIR | USAGE_WEIGHTS);
}
void test_tfrmd_dims_gru_biases() {
for (int i = 0; i < NUM_PREV_LAYERS; i++) {
for (int j = 0; j < NUM_BIASES_USAGES; j++) {
test_tfrmd_concat_dims(ZDNN_2DS, 0, 2, 16,
RNN_TYPE_GRU | prev_layers[i] | biases_usages[j]);
}
}
}
void test_tfrmd_dims_gru_no_vconcat_weights() {
for (int i = 0; i < NUM_NO_VCONCAT_INFOS; i++) {
test_tfrmd_concat_dims(ZDNN_3DS, 2, 15, 72,
RNN_TYPE_GRU | no_vconcat_infos[i]);
}
}
void test_tfrmd_dims_gru_prev_bidir_weights() {
test_tfrmd_concat_dims(ZDNN_3DS, 2, 20, 72,
RNN_TYPE_GRU | PREV_LAYER_BIDIR | USAGE_WEIGHTS);
}
void test_concat_weights_dim2(zdnn_concat_info info, uint32_t dim3,
uint32_t dim2, uint32_t dim1,
zdnn_status exp_status) {
zdnn_tensor_desc pre_tfrmd_desc, tfrmd_desc;
zdnn_status status;
zdnn_init_pre_transformed_desc(ZDNN_3DS, test_datatype, &pre_tfrmd_desc, dim3,
dim2, dim1);
status = zdnn_generate_transformed_desc_concatenated(&pre_tfrmd_desc, info,
&tfrmd_desc);
TEST_ASSERT_MESSAGE_FORMATTED(status == exp_status,
"zdnn_generate_transformed_desc_concatenated("
") unexpected status (status = %08x, "
"expects = %08x)",
status, exp_status);
}
void test_tfrmd_dims_lstm_no_vconcat_weights_odd_dim2_pass() {
test_concat_weights_dim2(RNN_TYPE_LSTM | USAGE_WEIGHTS | PREV_LAYER_UNI, 3, 9,
10, ZDNN_OK);
}
void test_tfrmd_dims_lstm_prev_bidir_weights_odd_dim2_fail() {
test_concat_weights_dim2(PREV_LAYER_BIDIR | RNN_TYPE_LSTM | USAGE_WEIGHTS, 3,
9, 10, ZDNN_INVALID_SHAPE);
}
void test_tfrmd_dims_gru_no_vconcat_weights_odd_dim2_pass() {
test_concat_weights_dim2(RNN_TYPE_LSTM | USAGE_WEIGHTS | PREV_LAYER_UNI, 3, 9,
10, ZDNN_OK);
}
void test_tfrmd_dims_gru_prev_bidir_weights_odd_dim2_fail() {
test_concat_weights_dim2(RNN_TYPE_GRU | USAGE_WEIGHTS | PREV_LAYER_BIDIR, 3,
9, 10, ZDNN_INVALID_SHAPE);
}
void test_tfrmd_dims_4ds_uni_rnn_output() {
test_tfrmd_dims(ZDNN_4DS, 2, 1, 3, 4, 2, 1, 3, 4);
}
void test_tfrmd_dims_4ds_bidir_rnn_output() {
test_tfrmd_dims(ZDNN_4DS, 2, 2, 3, 4, 2, 1, 3, 128);
}
int main() {
UNITY_BEGIN();
RUN_TEST_ALL_DLFLOAT16_PRE_DATATYPES(test_tfrmd_dims_nhwc_1);
RUN_TEST_ALL_DLFLOAT16_PRE_DATATYPES(test_tfrmd_dims_nhwc_2);
RUN_TEST_ALL_DLFLOAT16_PRE_DATATYPES(test_tfrmd_dims_4d);
RUN_TEST_ALL_DLFLOAT16_PRE_DATATYPES(test_tfrmd_dims_3ds_1);
RUN_TEST_ALL_DLFLOAT16_PRE_DATATYPES(test_tfrmd_dims_3ds_2);
RUN_TEST_ALL_DLFLOAT16_PRE_DATATYPES(test_tfrmd_dims_3d);
RUN_TEST_ALL_DLFLOAT16_PRE_DATATYPES(test_tfrmd_dims_2ds);
RUN_TEST_ALL_DLFLOAT16_PRE_DATATYPES(test_tfrmd_dims_2d);
RUN_TEST_ALL_DLFLOAT16_PRE_DATATYPES(test_tfrmd_dims_1d);
RUN_TEST_ALL_DLFLOAT16_PRE_DATATYPES(test_tfrmd_dims_lstm_biases);
RUN_TEST_ALL_DLFLOAT16_PRE_DATATYPES(test_tfrmd_dims_lstm_no_vconcat_weights);
RUN_TEST_ALL_DLFLOAT16_PRE_DATATYPES(test_tfrmd_dims_lstm_prev_bidir_weights);
RUN_TEST_ALL_DLFLOAT16_PRE_DATATYPES(test_tfrmd_dims_gru_biases);
RUN_TEST_ALL_DLFLOAT16_PRE_DATATYPES(test_tfrmd_dims_gru_no_vconcat_weights);
RUN_TEST_ALL_DLFLOAT16_PRE_DATATYPES(test_tfrmd_dims_gru_prev_bidir_weights);
RUN_TEST_ALL_DLFLOAT16_PRE_DATATYPES(
test_tfrmd_dims_lstm_no_vconcat_weights_odd_dim2_pass);
RUN_TEST_ALL_DLFLOAT16_PRE_DATATYPES(
test_tfrmd_dims_lstm_prev_bidir_weights_odd_dim2_fail);
RUN_TEST_ALL_DLFLOAT16_PRE_DATATYPES(
test_tfrmd_dims_gru_no_vconcat_weights_odd_dim2_pass);
RUN_TEST_ALL_DLFLOAT16_PRE_DATATYPES(
test_tfrmd_dims_gru_prev_bidir_weights_odd_dim2_fail);
RUN_TEST_ALL_DLFLOAT16_PRE_DATATYPES(test_tfrmd_dims_4ds_uni_rnn_output);
RUN_TEST_ALL_DLFLOAT16_PRE_DATATYPES(test_tfrmd_dims_4ds_bidir_rnn_output);
return UNITY_END();
}
zDNN-1.1.2/tests/testDriver_unstickify.c 0000664 0000000 0000000 00000111733 15000221702 0020230 0 ustar 00root root 0000000 0000000 // SPDX-License-Identifier: Apache-2.0
/*
* Copyright IBM Corp. 2021, 2024
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#include
#include
#include
#include
#include "testsupport.h"
void setUp(void) { VERIFY_HW_ENV; }
void tearDown(void) {}
//=================================================================================================
// tests for unstickify
/*
Use 1x4x4x1 as example:
1) Create the input tensor descriptor
2) Create the raw (i.e., dense) input tensor data with random
FP16/FP32/BFLOAT values 1 >= x > SMALLEST_RANDOM_FP.
For 1x4x4x1 we have 16 elements.
3) Create a zTensor with that.
4a) If caller wants to use offsets, we'll "stickify" the
input tensor data by putting things in ztensor.buffer directly:
stick_area[offsets[n] = fp16_to_dlf16(input_data[n]).
4b) If no_offsets, we'll use the official stickify routine.
5) Send that zTensor to unstickify, result goes to "data_unstickified"
6) compare the raw input tensor data against that "data_unstickified" array.
The rationale is since we're using random FP data, if there's something wrong
with the unstickify routine then it's very unlikely to match 100% with the
raw input data.
*/
void test_unstickify(uint32_t dim4, uint32_t dim3, uint32_t dim2, uint32_t dim1,
zdnn_data_layouts layout, bool no_offsets) {
zdnn_tensor_desc pre_tfrmd_desc, tfrmd_desc;
zdnn_ztensor ztensor;
zdnn_status status;
void *data, *data_unstickified;
switch (layout) {
case (ZDNN_1D):
zdnn_init_pre_transformed_desc(layout, test_datatype, &pre_tfrmd_desc,
dim1);
break;
case (ZDNN_2D):
case (ZDNN_2DS):
zdnn_init_pre_transformed_desc(layout, test_datatype, &pre_tfrmd_desc, dim2,
dim1);
break;
case (ZDNN_3D):
case (ZDNN_3DS):
zdnn_init_pre_transformed_desc(layout, test_datatype, &pre_tfrmd_desc, dim3,
dim2, dim1);
break;
default:
zdnn_init_pre_transformed_desc(layout, test_datatype, &pre_tfrmd_desc, dim4,
dim3, dim2, dim1);
}
status = zdnn_generate_transformed_desc(&pre_tfrmd_desc, &tfrmd_desc);
TEST_ASSERT_MESSAGE_FORMATTED(
status == ZDNN_OK,
"zdnn_generate_transformed_desc() failed (status = %08x)", status);
status =
zdnn_init_ztensor_with_malloc(&pre_tfrmd_desc, &tfrmd_desc, &ztensor);
TEST_ASSERT_MESSAGE_FORMATTED(
status == ZDNN_OK,
"zdnn_init_ztensor_with_malloc() failed (status = %08x)", status);
uint64_t num_elements = get_num_elements(&ztensor, ELEMENTS_PRE);
data = create_and_fill_random_fp_data(&ztensor);
data_unstickified =
malloc(num_elements * get_data_type_size(pre_tfrmd_desc.type));
if (no_offsets) {
// Stickify tensor using the official API
status = zdnn_transform_ztensor(&ztensor, data);
TEST_ASSERT_MESSAGE_FORMATTED(
status == ZDNN_OK, "zdnn_transform_ztensor failed (status = %08x)",
status);
} else {
// "stickify" by converting input values to DLFLOAT16s and writing directly
// to the ztensor's buffer.
size_t *offsets;
if (layout != ZDNN_4DS) {
offsets = alloc_offsets(&ztensor);
} else {
offsets = alloc_rnn_output_offsets(&ztensor);
}
for (uint64_t i = 0; i < num_elements; i++) {
uint16_t stickified_input_value = 0;
switch (test_datatype) {
case BFLOAT:
stickified_input_value = cnvt_1_bfloat_to_dlf16(((uint16_t *)data)[i]);
break;
case FP16:
stickified_input_value = cnvt_1_fp16_to_dlf16(((uint16_t *)data)[i]);
break;
case FP32:
stickified_input_value = cnvt_1_fp32_to_dlf16(((float *)data)[i]);
break;
default:
TEST_FAIL_MESSAGE("Unsupported data type");
free(data_unstickified);
return;
}
// offsets[i] is in # of bytes
// ztensor.buffer is void*
// stickified_input_value is uint16_t
*(uint16_t *)((uintptr_t)(ztensor.buffer) + offsets[i]) =
stickified_input_value;
}
free(offsets);
// hack, since we never actually stickified anything
ztensor.is_transformed = true;
}
status = zdnn_transform_origtensor(&ztensor, data_unstickified);
TEST_ASSERT_MESSAGE_FORMATTED(
status == ZDNN_OK, "zdnn_transform_origtensor failed (status = %08x)",
status);
BEGIN_BLOCK_IF_LOGLEVEL_DEBUG {
dumpdata_origtensor(&pre_tfrmd_desc, data, AS_FLOAT);
dumpdata_ztensor(&ztensor, AS_FLOAT, false);
dumpdata_origtensor(&pre_tfrmd_desc, data_unstickified, AS_FLOAT);
}
char *error_fmt = "Incorrect value at element %" PRIu64 ": Unstickified: "
"%.6f, Expected: %.6f";
// the zdnn_transform_origtensor() values went through a
// FP16/32/BFLOAT16 -> DLFLOAT16 -> FP16/32/BFLOAT16 roundtrip, so we can't
// just compare them with like a memcmp() because we could have lost precision
// during the process
for (uint64_t i = 0; i < num_elements; i++) {
switch (test_datatype) {
case BFLOAT: {
// raw tensor value, this is the "expected" value
uint16_t data_val = ((uint16_t *)data)[i];
// BFLOAT -> DLFLOAT16 -> BFLOAT roundtrip'd tensor
// value
uint16_t data_unstickified_val = ((uint16_t *)data_unstickified)[i];
TEST_ASSERT_MESSAGE_FORMATTED(
almost_equal_bfloat(data_unstickified_val, data_val), error_fmt, i,
cnvt_1_bfloat_to_fp32(data_unstickified_val),
cnvt_1_bfloat_to_fp32(data_val));
break;
}
case FP16: {
// raw tensor value
uint16_t data_val = ((uint16_t *)data)[i];
// FP16 -> DLFLOAT16 -> FP16 roundtrip'd tensor value
uint16_t data_unstickified_val = ((uint16_t *)data_unstickified)[i];
TEST_ASSERT_MESSAGE_FORMATTED(
almost_equal_fp16(data_unstickified_val, data_val), error_fmt, i,
cnvt_1_fp16_to_fp32(data_unstickified_val),
cnvt_1_fp16_to_fp32(data_val));
break;
}
case FP32: {
// raw tensor value
float data_val = ((float *)data)[i];
// FP32 -> DLFLOAT16 -> FP32 roundtrip'd tensor value
float data_unstickified_val = ((float *)data_unstickified)[i];
TEST_ASSERT_MESSAGE_FORMATTED(
almost_equal_float(data_unstickified_val, data_val), error_fmt, i,
data_unstickified_val, data_val);
break;
}
default:
TEST_FAIL_MESSAGE("Unsupported data type");
return;
}
}
free(data);
free(data_unstickified);
zdnn_free_ztensor_buffer(&ztensor);
}
/**************************************************************
* NHWC
**************************************************************/
/*
* Tensor with 16 entries, NHWC
* 1,4,4,1 NHWC will use one cell per stick, 4 sticks per page and a total of
* 4 pages.
*/
void test_nhwc_1x4x4x1() { test_unstickify(1, 4, 4, 1, ZDNN_NHWC, false); };
void test_nhwc_1x4x4x2() { test_unstickify(1, 4, 4, 2, ZDNN_NHWC, false); };
/*
* Tensor with 1024 entries, NHWC
* 1,32,32,1 NHWC will use 1 cell per stick, all sticks in the page,
* and 32 pages.
*/
void test_nhwc_1x32x32x1() { test_unstickify(1, 32, 32, 1, ZDNN_NHWC, false); };
void test_nhwc_1x32x32x2() { test_unstickify(1, 32, 32, 2, ZDNN_NHWC, false); };
void test_nhwc_1x32x32x3() { test_unstickify(1, 32, 32, 3, ZDNN_NHWC, false); };
void test_nhwc_1x1x2x1() { test_unstickify(1, 1, 2, 1, ZDNN_NHWC, false); };
void test_nhwc_1x1x2x2() { test_unstickify(1, 1, 2, 2, ZDNN_NHWC, false); };
void test_nhwc_1x1x2x4() { test_unstickify(1, 1, 2, 4, ZDNN_NHWC, false); };
void test_nhwc_1x1x2x7() { test_unstickify(1, 1, 2, 7, ZDNN_NHWC, false); };
void test_nhwc_1x1x4x1() { test_unstickify(1, 1, 4, 1, ZDNN_NHWC, false); };
void test_nhwc_1x1x4x2() { test_unstickify(1, 1, 4, 2, ZDNN_NHWC, false); };
void test_nhwc_1x1x4x4() { test_unstickify(1, 1, 4, 4, ZDNN_NHWC, false); };
void test_nhwc_1x1x4x7() { test_unstickify(1, 1, 4, 7, ZDNN_NHWC, false); };
void test_nhwc_1x1x7x1() { test_unstickify(1, 1, 7, 1, ZDNN_NHWC, false); };
void test_nhwc_1x1x7x2() { test_unstickify(1, 1, 7, 2, ZDNN_NHWC, false); };
void test_nhwc_1x1x7x4() { test_unstickify(1, 1, 7, 4, ZDNN_NHWC, false); };
void test_nhwc_1x1x7x7() { test_unstickify(1, 1, 7, 7, ZDNN_NHWC, false); };
void test_nhwc_1x1x8x1() { test_unstickify(1, 1, 8, 1, ZDNN_NHWC, false); };
void test_nhwc_1x1x8x2() { test_unstickify(1, 1, 8, 2, ZDNN_NHWC, false); };
void test_nhwc_1x1x8x4() { test_unstickify(1, 1, 8, 4, ZDNN_NHWC, false); };
void test_nhwc_1x1x8x7() { test_unstickify(1, 1, 8, 7, ZDNN_NHWC, false); };
void test_nhwc_1x1x13x1() { test_unstickify(1, 1, 13, 1, ZDNN_NHWC, false); };
void test_nhwc_1x1x13x2() { test_unstickify(1, 1, 13, 2, ZDNN_NHWC, false); };
void test_nhwc_1x1x13x4() { test_unstickify(1, 1, 13, 4, ZDNN_NHWC, false); };
void test_nhwc_1x1x13x7() { test_unstickify(1, 1, 13, 7, ZDNN_NHWC, false); };
void test_nhwc_1x1x100x1() { test_unstickify(1, 1, 100, 1, ZDNN_NHWC, false); };
void test_nhwc_1x1x100x2() { test_unstickify(1, 1, 100, 2, ZDNN_NHWC, false); };
void test_nhwc_1x1x100x4() { test_unstickify(1, 1, 100, 4, ZDNN_NHWC, false); };
void test_nhwc_1x1x100x7() { test_unstickify(1, 1, 100, 7, ZDNN_NHWC, false); };
void test_nhwc_2x3x2x1() { test_unstickify(2, 3, 2, 1, ZDNN_NHWC, false); };
void test_nhwc_2x3x2x2() { test_unstickify(2, 3, 2, 2, ZDNN_NHWC, false); };
void test_nhwc_2x3x2x4() { test_unstickify(2, 3, 2, 4, ZDNN_NHWC, false); };
void test_nhwc_2x3x2x7() { test_unstickify(2, 3, 2, 7, ZDNN_NHWC, false); };
void test_nhwc_2x3x4x1() { test_unstickify(2, 3, 4, 1, ZDNN_NHWC, false); };
void test_nhwc_2x3x4x2() { test_unstickify(2, 3, 4, 2, ZDNN_NHWC, false); };
void test_nhwc_2x3x4x4() { test_unstickify(2, 3, 4, 4, ZDNN_NHWC, false); };
void test_nhwc_2x3x4x7() { test_unstickify(2, 3, 4, 7, ZDNN_NHWC, false); };
void test_nhwc_2x3x7x1() { test_unstickify(2, 3, 7, 1, ZDNN_NHWC, false); };
void test_nhwc_2x3x7x2() { test_unstickify(2, 3, 7, 2, ZDNN_NHWC, false); };
void test_nhwc_2x3x7x4() { test_unstickify(2, 3, 7, 4, ZDNN_NHWC, false); };
void test_nhwc_2x3x7x7() { test_unstickify(2, 3, 7, 7, ZDNN_NHWC, false); };
void test_nhwc_2x3x8x1() { test_unstickify(2, 3, 8, 1, ZDNN_NHWC, false); };
void test_nhwc_2x3x8x2() { test_unstickify(2, 3, 8, 2, ZDNN_NHWC, false); };
void test_nhwc_2x3x8x4() { test_unstickify(2, 3, 8, 4, ZDNN_NHWC, false); };
void test_nhwc_2x3x8x7() { test_unstickify(2, 3, 8, 7, ZDNN_NHWC, false); };
void test_nhwc_2x3x13x1() { test_unstickify(2, 3, 13, 1, ZDNN_NHWC, false); };
void test_nhwc_2x3x13x2() { test_unstickify(2, 3, 13, 2, ZDNN_NHWC, false); };
void test_nhwc_2x3x13x4() { test_unstickify(2, 3, 13, 4, ZDNN_NHWC, false); };
void test_nhwc_2x3x13x7() { test_unstickify(2, 3, 13, 7, ZDNN_NHWC, false); };
void test_nhwc_2x3x100x1() { test_unstickify(2, 3, 100, 1, ZDNN_NHWC, false); };
void test_nhwc_2x3x100x2() { test_unstickify(2, 3, 100, 2, ZDNN_NHWC, false); };
void test_nhwc_2x3x100x4() { test_unstickify(2, 3, 100, 4, ZDNN_NHWC, false); };
void test_nhwc_2x3x100x7() { test_unstickify(2, 3, 100, 7, ZDNN_NHWC, false); };
void test_nhwc_3x2x2x1() { test_unstickify(3, 2, 2, 1, ZDNN_NHWC, false); };
void test_nhwc_3x2x2x2() { test_unstickify(3, 2, 2, 2, ZDNN_NHWC, false); };
void test_nhwc_3x2x2x4() { test_unstickify(3, 2, 2, 4, ZDNN_NHWC, false); };
void test_nhwc_3x2x2x7() { test_unstickify(3, 2, 2, 7, ZDNN_NHWC, false); };
void test_nhwc_3x2x4x1() { test_unstickify(3, 2, 4, 1, ZDNN_NHWC, false); };
void test_nhwc_3x2x4x2() { test_unstickify(3, 2, 4, 2, ZDNN_NHWC, false); };
void test_nhwc_3x2x4x4() { test_unstickify(3, 2, 4, 4, ZDNN_NHWC, false); };
void test_nhwc_3x2x4x7() { test_unstickify(3, 2, 4, 7, ZDNN_NHWC, false); };
void test_nhwc_3x2x7x1() { test_unstickify(3, 2, 7, 1, ZDNN_NHWC, false); };
void test_nhwc_3x2x7x2() { test_unstickify(3, 2, 7, 2, ZDNN_NHWC, false); };
void test_nhwc_3x2x7x4() { test_unstickify(3, 2, 7, 4, ZDNN_NHWC, false); };
void test_nhwc_3x2x7x7() { test_unstickify(3, 2, 7, 7, ZDNN_NHWC, false); };
void test_nhwc_3x2x8x1() { test_unstickify(3, 2, 8, 1, ZDNN_NHWC, false); };
void test_nhwc_3x2x8x2() { test_unstickify(3, 2, 8, 2, ZDNN_NHWC, false); };
void test_nhwc_3x2x8x4() { test_unstickify(3, 2, 8, 4, ZDNN_NHWC, false); };
void test_nhwc_3x2x8x7() { test_unstickify(3, 2, 8, 7, ZDNN_NHWC, false); };
void test_nhwc_3x2x13x1() { test_unstickify(3, 2, 13, 1, ZDNN_NHWC, false); };
void test_nhwc_3x2x13x2() { test_unstickify(3, 2, 13, 2, ZDNN_NHWC, false); };
void test_nhwc_3x2x13x4() { test_unstickify(3, 2, 13, 4, ZDNN_NHWC, false); };
void test_nhwc_3x2x13x7() { test_unstickify(3, 2, 13, 7, ZDNN_NHWC, false); };
void test_nhwc_3x2x100x1() { test_unstickify(3, 2, 100, 1, ZDNN_NHWC, false); };
void test_nhwc_3x2x100x2() { test_unstickify(3, 2, 100, 2, ZDNN_NHWC, false); };
void test_nhwc_3x2x100x4() { test_unstickify(3, 2, 100, 4, ZDNN_NHWC, false); };
void test_nhwc_3x2x100x7() { test_unstickify(3, 2, 100, 7, ZDNN_NHWC, false); };
void test_nhwc_1x1x1xe1(int e1) {
test_unstickify(1, 1, 1, e1, ZDNN_NHWC, false);
}
void test_nhwc_1x1x1x4() { test_nhwc_1x1x1xe1(4); }
void test_nhwc_1x1x1x5() { test_nhwc_1x1x1xe1(5); }
void test_nhwc_1x1x1x8() { test_nhwc_1x1x1xe1(8); }
void test_nhwc_1x1x1x9() { test_nhwc_1x1x1xe1(9); }
void test_nhwc_1x1x1x63() { test_nhwc_1x1x1xe1(63); }
void test_nhwc_1x1x1x64() { test_nhwc_1x1x1xe1(64); }
void test_nhwc_1x1x1x65() { test_nhwc_1x1x1xe1(65); }
void test_nhwc_1x1x1x127() { test_nhwc_1x1x1xe1(127); }
void test_nhwc_1x1x1x128() { test_nhwc_1x1x1xe1(128); }
/*
* Tensor with 16 entries, 3DS
* 4,4,1 3DS will use one cell per stick, 4 sticks per page and a total of 4
* pages.
*/
void test_3ds_4x4x1() {
// first entry doesn't matter
test_unstickify(9999, 4, 4, 1, ZDNN_3DS, false);
}
/*
* Tensor with 3072 entries, 3DS
* 32,32,3 3DS will use 3 cells per stick, all sticks in the page,
* and 32 pages.
*/
void test_3ds_32x32x3() {
// first entry doesn't matter
test_unstickify(9999, 32, 32, 3, ZDNN_3DS, false);
}
/*
* Tensor with 8 entries, 2DS
* 4,2 2DS will use two cells per stick, (implied 1 stick per page) and a
* total of 4 pages.
*/
void test_2ds_4x2() {
// first two entries don't matter in 2DS
test_unstickify(9999, 9999, 4, 2, ZDNN_2DS, false);
}
/*
* Tensor with 4k entries, 2DS
* We expect this to require 4 pages total. Each dim2 will require 2 pages.
* The first page will have all 64 cells of all 32 sticks filled holding 2048
* values. A second page will have 1 stick with 1 cell filled to hold val
* 2049.
*/
void test_2ds_2x2049() {
// first two entries don't matter in 2DS
test_unstickify(9999, 9999, 2, 2049, ZDNN_2DS, false);
}
/**************************************************************
* NCHW
**************************************************************/
void test_nchw_1x1x4x4() { test_unstickify(1, 1, 4, 4, ZDNN_NCHW, 0); }
void test_nchw_1x4x2x3() { test_unstickify(1, 4, 2, 3, ZDNN_NCHW, 0); }
void test_nchw_1x3x32x32() { test_unstickify(1, 3, 32, 32, ZDNN_NCHW, 0); }
void test_nchw_2x129x3x33() { test_unstickify(2, 129, 3, 33, ZDNN_NCHW, 0); }
void test_nchw_1x64x1x31() { test_unstickify(1, 64, 1, 31, ZDNN_NCHW, 0); }
void test_nchw_1x64x1x32() { test_unstickify(1, 64, 1, 32, ZDNN_NCHW, 0); }
void test_nchw_1x64x1x33() { test_unstickify(1, 64, 1, 33, ZDNN_NCHW, 0); }
void test_nchw_1x63x1x32() { test_unstickify(1, 63, 1, 32, ZDNN_NCHW, 0); }
void test_nchw_1x65x1x32() { test_unstickify(1, 65, 1, 32, ZDNN_NCHW, 0); }
void test_nchw_1x127x1x4() { test_unstickify(1, 127, 1, 4, ZDNN_NCHW, 0); }
void test_nchw_1x128x1x4() { test_unstickify(1, 128, 1, 4, ZDNN_NCHW, 0); }
void test_nchw_1x129x1x4() { test_unstickify(1, 129, 1, 4, ZDNN_NCHW, 0); }
void test_nchw_1x4x1x63() { test_unstickify(1, 4, 1, 63, ZDNN_NCHW, 0); }
void test_nchw_1x4x1x64() { test_unstickify(1, 4, 1, 64, ZDNN_NCHW, 0); }
void test_nchw_1x4x1x65() { test_unstickify(1, 4, 1, 65, ZDNN_NCHW, 0); }
/**************************************************************
* RNN OUTPUT
**************************************************************/
void test_rnn_output_5x1x4x3() { test_unstickify(5, 1, 4, 3, ZDNN_4DS, 0); }
void test_rnn_output_1x1x4x3() { test_unstickify(1, 1, 4, 3, ZDNN_4DS, 0); }
void test_rnn_output_5x1x4x64() { test_unstickify(5, 1, 4, 64, ZDNN_4DS, 0); }
void test_rnn_output_1x1x4x64() { test_unstickify(1, 1, 4, 64, ZDNN_4DS, 0); }
void test_rnn_output_5x1x4x65() { test_unstickify(5, 1, 4, 65, ZDNN_4DS, 0); }
void test_rnn_output_1x1x4x65() { test_unstickify(1, 1, 4, 65, ZDNN_4DS, 0); }
void test_rnn_output_5x1x31x5() { test_unstickify(5, 1, 31, 5, ZDNN_4DS, 0); }
void test_rnn_output_1x1x31x5() { test_unstickify(1, 1, 31, 5, ZDNN_4DS, 0); }
void test_rnn_output_5x1x60x5() { test_unstickify(5, 1, 60, 5, ZDNN_4DS, 0); }
void test_rnn_output_1x1x60x5() { test_unstickify(1, 1, 60, 5, ZDNN_4DS, 0); }
void test_rnn_output_5x2x4x3() { test_unstickify(5, 2, 4, 3, ZDNN_4DS, 0); }
void test_rnn_output_1x2x4x3() { test_unstickify(1, 2, 4, 3, ZDNN_4DS, 0); }
void test_rnn_output_5x2x4x64() { test_unstickify(5, 2, 4, 64, ZDNN_4DS, 0); }
void test_rnn_output_1x2x4x64() { test_unstickify(1, 2, 4, 64, ZDNN_4DS, 0); }
void test_rnn_output_5x2x4x65() { test_unstickify(5, 2, 4, 65, ZDNN_4DS, 0); }
void test_rnn_output_1x2x4x65() { test_unstickify(1, 2, 4, 65, ZDNN_4DS, 0); }
void test_rnn_output_5x2x31x5() { test_unstickify(5, 2, 31, 5, ZDNN_4DS, 0); }
void test_rnn_output_1x2x31x5() { test_unstickify(1, 2, 31, 5, ZDNN_4DS, 0); }
void test_rnn_output_5x2x60x5() { test_unstickify(5, 2, 60, 5, ZDNN_4DS, 0); }
void test_rnn_output_1x2x60x5() { test_unstickify(1, 2, 60, 5, ZDNN_4DS, 0); }
void test_unstickify_4dfeature_twice() {
zdnn_tensor_desc pre_tfrmd_desc, tfrmd_desc;
zdnn_ztensor ztensor;
zdnn_status status;
zdnn_init_pre_transformed_desc(ZDNN_NHWC, test_datatype, &pre_tfrmd_desc, 1,
4, 4, 1);
status = zdnn_generate_transformed_desc(&pre_tfrmd_desc, &tfrmd_desc);
TEST_ASSERT_MESSAGE_FORMATTED(
status == ZDNN_OK,
"zdnn_generate_transformed_desc() failed (status = %08x)", status);
status =
zdnn_init_ztensor_with_malloc(&pre_tfrmd_desc, &tfrmd_desc, &ztensor);
TEST_ASSERT_MESSAGE_FORMATTED(
status == ZDNN_OK,
"zdnn_init_ztensor_with_malloc() failed (status = %08x)", status);
unsigned char *data_unstickified =
malloc(get_num_elements(&ztensor, ELEMENTS_PRE) *
get_data_type_size(pre_tfrmd_desc.type));
ztensor.is_transformed = true; // hack, since we never actually
// stickified anything
status = zdnn_transform_origtensor(&ztensor, data_unstickified);
TEST_ASSERT_MESSAGE_FORMATTED(
status == ZDNN_OK,
"First unstickify: expected status = %08x, actual status = %08x", ZDNN_OK,
status);
// second one should still be OK
status = zdnn_transform_origtensor(&ztensor, data_unstickified);
TEST_ASSERT_MESSAGE_FORMATTED(
status == ZDNN_OK,
"Second unstickify: expected status = %08x, actual status = %08x",
ZDNN_OK, status);
}
void test_stickify_unstickify(uint32_t dim4, uint32_t dim3, uint32_t dim2,
uint32_t dim1, zdnn_data_layouts layout) {
test_unstickify(dim4, dim3, dim2, dim1, layout, true);
}
/*
* Tensor with 16 entries, NHWC
* 1,4,4,1 NHWC will use one cell per stick, 4 sticks per page and a total of
* 4 pages.
*/
//
void test_stickify_unstickify_nhwc_1x4x4x1() {
test_stickify_unstickify(1, 4, 4, 1, ZDNN_NHWC);
}
void test_stickify_unstickify_nhwc_1x4x4x2() {
test_stickify_unstickify(1, 4, 4, 2, ZDNN_NHWC);
}
/*
* Tensor with 3072 entries, NHWC
* 1,32,32,1 NHWC will use 1 cell per stick, all sticks in the page,
* and 32 pages.
*/
//
void test_stickify_unstickify_nhwc_1x32x32x1() {
test_stickify_unstickify(1, 32, 32, 1, ZDNN_NHWC);
}
void test_stickify_unstickify_nhwc_1x32x32x2() {
test_stickify_unstickify(1, 32, 32, 2, ZDNN_NHWC);
}
void test_stickify_unstickify_nhwc_1x32x32x3() {
test_stickify_unstickify(1, 32, 32, 3, ZDNN_NHWC);
}
void test_stickify_unstickify_nhwc_1x2x33x65() {
test_stickify_unstickify(1, 2, 33, 65, ZDNN_NHWC);
}
void test_stickify_unstickify_nchw_1x4x4x1() {
test_stickify_unstickify(1, 4, 4, 1, ZDNN_NCHW);
}
void test_stickify_unstickify_nchw_1x32x32x3() {
test_stickify_unstickify(1, 32, 32, 3, ZDNN_NCHW);
}
void test_stickify_unstickify_nchw_1x2x33x65() {
test_stickify_unstickify(1, 2, 33, 65, ZDNN_NCHW);
}
// This routine tests the conversion from DLF to FP16.
// Input: a "bad" value in DLFloat, which will "trip" the
// floating point exception trigger on VCFN
void test_ztensor_bad_value_FP16(uint16_t bad_value) {
#define TOO_LARGE_DLF16_POS 0x7E00
#define TOO_LARGE_DLF16_NEG 0xFE00
#define TOO_SMALL_DLF16_POS 0x0001
#define TOO_SMALL_DLF16_NEG 0x8001
// Note: Ninf = "NaN or INF"
#define NINF_DLF16_POS 0x7FFF
#define NINF_DLF16_NEG 0xFFFF
#define STICK_ENTRIES_FP16 7
const uint32_t stick_entries_to_try[STICK_ENTRIES_FP16] = {0, 1, 7, 8,
9, 62, 63};
zdnn_tensor_desc pre_tfrmd_desc, tfrmd_desc;
zdnn_ztensor ztensor;
unsigned char *data;
zdnn_status status;
uint16_t *array; // Alternate view on the stickified_data (ztensor.buffer)
unsigned char *unstickified_data;
// Build a transformed ztensor with valid data
zdnn_init_pre_transformed_desc(ZDNN_NHWC, FP16, &pre_tfrmd_desc, 1, 1, 1, 64);
zdnn_generate_transformed_desc(&pre_tfrmd_desc, &tfrmd_desc);
zdnn_init_ztensor_with_malloc(&pre_tfrmd_desc, &tfrmd_desc, &ztensor);
data = create_and_fill_random_fp_data(&ztensor);
// Transform the data to an is_stickified ztensor, so we can test
// unstickification later
status = zdnn_transform_ztensor(&ztensor, data);
TEST_ASSERT_MESSAGE_FORMATTED(status == ZDNN_OK,
"zdnn_transform_ztensor failed (status = %08x)",
status);
// Create an area to unstickify/convert back to
uint64_t num_elements = get_num_elements(&ztensor, ELEMENTS_PRE);
zdnn_data_types dtype = ztensor.pre_transformed_desc->type;
unstickified_data = malloc(num_elements * get_data_type_size(dtype));
array = (uint16_t *)ztensor.buffer; /* use stickified_data as an array */
for (int i = 0; i < STICK_ENTRIES_FP16; i++) {
array[stick_entries_to_try[i]] = bad_value;
status = zdnn_transform_origtensor(&ztensor, unstickified_data);
TEST_ASSERT_MESSAGE_FORMATTED(
status == ZDNN_CONVERT_FAILURE,
"zdnn_transform_origtensor() succeeded (status = %08x, expects = "
"%08x, i = %d, value = %04x)",
status, ZDNN_CONVERT_FAILURE, i, bad_value);
array[stick_entries_to_try[i]] = 0; // set entry to 0 for next iteration
}
// Free allocated storage
free(data);
free(unstickified_data);
zdnn_free_ztensor_buffer(&ztensor);
}
// Test unstickify conversions DLFloat to FP16 (VCFN)
void test_ztensor_fp16_bad_values() {
test_ztensor_bad_value_FP16(
TOO_LARGE_DLF16_POS); // is not a number, will cause overflow
test_ztensor_bad_value_FP16(
TOO_LARGE_DLF16_NEG); // is not a number, will cause overflow
// TODO:
// The following look valid in the documentation, but do not happen on test
// system at this time
// test_ztensor_bad_value_FP16(
// TOO_SMALL_DLF16_POS); // is not a number, will cause overflow
// test_ztensor_bad_value_FP16(
// TOO_SMALL_DLF16_NEG); // is not a number, will cause overflow
test_ztensor_bad_value_FP16(
NINF_DLF16_POS); // is not a number, will cause invalid op
test_ztensor_bad_value_FP16(NINF_DLF16_NEG); // is not a number, will cause
// invalid op
}
// This routine tests the conversion from DLF to FP32.
// Input: a "bad" value in DLFloat, which will "trip" the
// floating point exception trigger on VCLFNH/VCLFNL
// NOTE: Only Not-A-Number values will trip the exception.
// "Anything DLFLOAT16 can represent, FP32 can do better." -TinTo
void test_ztensor_bad_value_FP32(uint16_t bad_value) {
#define NAN_DL16_POS 0x7FFF
#define NAN_DL16_NEG 0xFFFF
#define STICK_ENTRIES_FP32 9
const uint32_t stick_entries_to_try[STICK_ENTRIES_FP32] = {0, 1, 3, 4, 7,
8, 9, 15, 63};
zdnn_tensor_desc pre_tfrmd_desc, tfrmd_desc;
zdnn_ztensor ztensor;
unsigned char *data;
uint16_t *array;
zdnn_status status;
unsigned char *unstickified_data;
// Build a transformed ztensor with valid data
zdnn_init_pre_transformed_desc(ZDNN_NHWC, FP32, &pre_tfrmd_desc, 1, 1, 1, 64);
zdnn_generate_transformed_desc(&pre_tfrmd_desc, &tfrmd_desc);
zdnn_init_ztensor_with_malloc(&pre_tfrmd_desc, &tfrmd_desc, &ztensor);
data = create_and_fill_random_fp_data(&ztensor);
// Transform the data to an stickified ztensor, so we can test
// unstickification later
status = zdnn_transform_ztensor(&ztensor, data);
TEST_ASSERT_MESSAGE_FORMATTED(status == ZDNN_OK,
"zdnn_transform_ztensor failed (status = %08x)",
status);
// Create an area to unstickify/convert back to
uint64_t num_elements = get_num_elements(&ztensor, ELEMENTS_PRE);
zdnn_data_types dtype = ztensor.pre_transformed_desc->type;
unstickified_data = malloc(num_elements * get_data_type_size(dtype));
array = (uint16_t *)ztensor.buffer; /* use stickified_data as an array */
zdnn_status expected_status;
// Calculate STRIDE_N_SIZE for the tensor. When STRIDE_N_SIZE >
// STICK_SW_THRESHOLD use hardware stickification otherwise stay in software
// stickification as this shows the greatest performance benefit.
uint64_t STRIDE_N_SIZE =
((uint64_t)tfrmd_desc.dim3 * (uint64_t)tfrmd_desc.dim2 *
(uint64_t)tfrmd_desc.dim1);
// Check if hardware will handle the transformation
if ((zdnn_is_nnpa_function_installed(1, NNPA_TRANSFORM) == true) &&
(STRIDE_N_SIZE > STICK_SW_THRESHOLD)) {
expected_status = ZDNN_ELEMENT_RANGE_VIOLATION;
} else {
expected_status = ZDNN_CONVERT_FAILURE;
}
for (int i = 0; i < STICK_ENTRIES_FP32; i++) {
array[stick_entries_to_try[i]] = bad_value;
status = zdnn_transform_origtensor(&ztensor, unstickified_data);
TEST_ASSERT_MESSAGE_FORMATTED(
status == expected_status,
"zdnn_transform_origtensor() succeeded (status = %08x, expects = "
"%08x, i = %d, value = %04x)",
status, expected_status, i, bad_value);
array[stick_entries_to_try[i]] = 0; // set entry to 0 for next iteration
}
// Free allocated storage
free(data);
free(unstickified_data);
zdnn_free_ztensor_buffer(&ztensor);
}
// Test unstickify conversions DLFloat to FP32 (VCLFNx
void test_ztensor_fp32_bad_values() {
// too large or too small not possible,
test_ztensor_bad_value_FP32(
NAN_DL16_POS); // is not a number, will cause overflow
test_ztensor_bad_value_FP32(
NAN_DL16_NEG); // is not a number, will cause overflow
}
// Test unstickify invalid transform type
void test_unstickify_transform_desc_invalid_type() {
zdnn_tensor_desc pre_tfrmd_desc, tfrmd_desc;
zdnn_ztensor ztensor;
zdnn_status status;
unsigned char *unstickified_data;
// Create descriptors and ztensor
// For test, pre_transformed desc must be valid. All other transformed desc
// options must be valid. Type will be changed.
zdnn_init_pre_transformed_desc(ZDNN_NHWC, FP32, &pre_tfrmd_desc, 1, 1, 1, 64);
zdnn_generate_transformed_desc(&pre_tfrmd_desc, &tfrmd_desc);
zdnn_init_ztensor_with_malloc(&pre_tfrmd_desc, &tfrmd_desc, &ztensor);
// Allocate storage for unstickified data. Although not required for test, if
// expected status doesn't occur, this space may be touched and would require
// to be allocated or it may blow up.
uint64_t num_elements = get_num_elements(&ztensor, ELEMENTS_PRE);
unstickified_data =
malloc(num_elements * get_data_type_size(ztensor.transformed_desc->type));
// Set is_transformed to true as this check occurs prior to type check
ztensor.is_transformed = true;
// Update type to an invalid type.
ztensor.transformed_desc->type = test_datatype;
status = zdnn_transform_origtensor(&ztensor, unstickified_data);
TEST_ASSERT_MESSAGE_FORMATTED(
status == ZDNN_INVALID_TYPE,
"zdnn_transform_origtensor() unexpected status (status = %08x, "
"expects = %08x)",
status, ZDNN_INVALID_TYPE);
free(unstickified_data);
zdnn_free_ztensor_buffer(&ztensor);
}
int main(void) {
UNITY_BEGIN();
RUN_TEST_ALL_DLFLOAT16_PRE_DATATYPES(test_nhwc_1x4x4x1);
RUN_TEST_ALL_DLFLOAT16_PRE_DATATYPES(test_nhwc_1x4x4x2);
RUN_TEST_ALL_DLFLOAT16_PRE_DATATYPES(test_nhwc_1x32x32x1);
RUN_TEST_ALL_DLFLOAT16_PRE_DATATYPES(test_nhwc_1x32x32x2);
RUN_TEST_ALL_DLFLOAT16_PRE_DATATYPES(test_nhwc_1x32x32x3);
RUN_TEST_ALL_DLFLOAT16_PRE_DATATYPES(test_nhwc_1x1x2x1);
RUN_TEST_ALL_DLFLOAT16_PRE_DATATYPES(test_nhwc_1x1x2x2);
RUN_TEST_ALL_DLFLOAT16_PRE_DATATYPES(test_nhwc_1x1x2x4);
RUN_TEST_ALL_DLFLOAT16_PRE_DATATYPES(test_nhwc_1x1x2x7);
RUN_TEST_ALL_DLFLOAT16_PRE_DATATYPES(test_nhwc_1x1x4x1);
RUN_TEST_ALL_DLFLOAT16_PRE_DATATYPES(test_nhwc_1x1x4x2);
RUN_TEST_ALL_DLFLOAT16_PRE_DATATYPES(test_nhwc_1x1x4x4);
RUN_TEST_ALL_DLFLOAT16_PRE_DATATYPES(test_nhwc_1x1x4x7);
RUN_TEST_ALL_DLFLOAT16_PRE_DATATYPES(test_nhwc_1x1x7x1);
RUN_TEST_ALL_DLFLOAT16_PRE_DATATYPES(test_nhwc_1x1x7x2);
RUN_TEST_ALL_DLFLOAT16_PRE_DATATYPES(test_nhwc_1x1x7x4);
RUN_TEST_ALL_DLFLOAT16_PRE_DATATYPES(test_nhwc_1x1x7x7);
RUN_TEST_ALL_DLFLOAT16_PRE_DATATYPES(test_nhwc_1x1x8x1);
RUN_TEST_ALL_DLFLOAT16_PRE_DATATYPES(test_nhwc_1x1x8x2);
RUN_TEST_ALL_DLFLOAT16_PRE_DATATYPES(test_nhwc_1x1x8x4);
RUN_TEST_ALL_DLFLOAT16_PRE_DATATYPES(test_nhwc_1x1x8x7);
RUN_TEST_ALL_DLFLOAT16_PRE_DATATYPES(test_nhwc_1x1x13x1);
RUN_TEST_ALL_DLFLOAT16_PRE_DATATYPES(test_nhwc_1x1x13x2);
RUN_TEST_ALL_DLFLOAT16_PRE_DATATYPES(test_nhwc_1x1x13x4);
RUN_TEST_ALL_DLFLOAT16_PRE_DATATYPES(test_nhwc_1x1x13x7);
RUN_TEST_ALL_DLFLOAT16_PRE_DATATYPES(test_nhwc_1x1x100x1);
RUN_TEST_ALL_DLFLOAT16_PRE_DATATYPES(test_nhwc_1x1x100x2);
RUN_TEST_ALL_DLFLOAT16_PRE_DATATYPES(test_nhwc_1x1x100x4);
RUN_TEST_ALL_DLFLOAT16_PRE_DATATYPES(test_nhwc_1x1x100x7);
RUN_TEST_ALL_DLFLOAT16_PRE_DATATYPES(test_nhwc_2x3x2x1);
RUN_TEST_ALL_DLFLOAT16_PRE_DATATYPES(test_nhwc_2x3x2x2);
RUN_TEST_ALL_DLFLOAT16_PRE_DATATYPES(test_nhwc_2x3x2x4);
RUN_TEST_ALL_DLFLOAT16_PRE_DATATYPES(test_nhwc_2x3x2x7);
RUN_TEST_ALL_DLFLOAT16_PRE_DATATYPES(test_nhwc_2x3x4x1);
RUN_TEST_ALL_DLFLOAT16_PRE_DATATYPES(test_nhwc_2x3x4x2);
RUN_TEST_ALL_DLFLOAT16_PRE_DATATYPES(test_nhwc_2x3x4x4);
RUN_TEST_ALL_DLFLOAT16_PRE_DATATYPES(test_nhwc_2x3x4x7);
RUN_TEST_ALL_DLFLOAT16_PRE_DATATYPES(test_nhwc_2x3x7x1);
RUN_TEST_ALL_DLFLOAT16_PRE_DATATYPES(test_nhwc_2x3x7x2);
RUN_TEST_ALL_DLFLOAT16_PRE_DATATYPES(test_nhwc_2x3x7x4);
RUN_TEST_ALL_DLFLOAT16_PRE_DATATYPES(test_nhwc_2x3x7x7);
RUN_TEST_ALL_DLFLOAT16_PRE_DATATYPES(test_nhwc_2x3x8x1);
RUN_TEST_ALL_DLFLOAT16_PRE_DATATYPES(test_nhwc_2x3x8x2);
RUN_TEST_ALL_DLFLOAT16_PRE_DATATYPES(test_nhwc_2x3x8x4);
RUN_TEST_ALL_DLFLOAT16_PRE_DATATYPES(test_nhwc_2x3x8x7);
RUN_TEST_ALL_DLFLOAT16_PRE_DATATYPES(test_nhwc_2x3x13x1);
RUN_TEST_ALL_DLFLOAT16_PRE_DATATYPES(test_nhwc_2x3x13x2);
RUN_TEST_ALL_DLFLOAT16_PRE_DATATYPES(test_nhwc_2x3x13x4);
RUN_TEST_ALL_DLFLOAT16_PRE_DATATYPES(test_nhwc_2x3x13x7);
RUN_TEST_ALL_DLFLOAT16_PRE_DATATYPES(test_nhwc_2x3x100x1);
RUN_TEST_ALL_DLFLOAT16_PRE_DATATYPES(test_nhwc_2x3x100x2);
RUN_TEST_ALL_DLFLOAT16_PRE_DATATYPES(test_nhwc_2x3x100x4);
RUN_TEST_ALL_DLFLOAT16_PRE_DATATYPES(test_nhwc_2x3x100x7);
RUN_TEST_ALL_DLFLOAT16_PRE_DATATYPES(test_nhwc_3x2x2x1);
RUN_TEST_ALL_DLFLOAT16_PRE_DATATYPES(test_nhwc_3x2x2x2);
RUN_TEST_ALL_DLFLOAT16_PRE_DATATYPES(test_nhwc_3x2x2x4);
RUN_TEST_ALL_DLFLOAT16_PRE_DATATYPES(test_nhwc_3x2x2x7);
RUN_TEST_ALL_DLFLOAT16_PRE_DATATYPES(test_nhwc_3x2x4x1);
RUN_TEST_ALL_DLFLOAT16_PRE_DATATYPES(test_nhwc_3x2x4x2);
RUN_TEST_ALL_DLFLOAT16_PRE_DATATYPES(test_nhwc_3x2x4x4);
RUN_TEST_ALL_DLFLOAT16_PRE_DATATYPES(test_nhwc_3x2x4x7);
RUN_TEST_ALL_DLFLOAT16_PRE_DATATYPES(test_nhwc_3x2x7x1);
RUN_TEST_ALL_DLFLOAT16_PRE_DATATYPES(test_nhwc_3x2x7x2);
RUN_TEST_ALL_DLFLOAT16_PRE_DATATYPES(test_nhwc_3x2x7x4);
RUN_TEST_ALL_DLFLOAT16_PRE_DATATYPES(test_nhwc_3x2x7x7);
RUN_TEST_ALL_DLFLOAT16_PRE_DATATYPES(test_nhwc_3x2x8x1);
RUN_TEST_ALL_DLFLOAT16_PRE_DATATYPES(test_nhwc_3x2x8x2);
RUN_TEST_ALL_DLFLOAT16_PRE_DATATYPES(test_nhwc_3x2x8x4);
RUN_TEST_ALL_DLFLOAT16_PRE_DATATYPES(test_nhwc_3x2x8x7);
RUN_TEST_ALL_DLFLOAT16_PRE_DATATYPES(test_nhwc_3x2x13x1);
RUN_TEST_ALL_DLFLOAT16_PRE_DATATYPES(test_nhwc_3x2x13x2);
RUN_TEST_ALL_DLFLOAT16_PRE_DATATYPES(test_nhwc_3x2x13x4);
RUN_TEST_ALL_DLFLOAT16_PRE_DATATYPES(test_nhwc_3x2x13x7);
RUN_TEST_ALL_DLFLOAT16_PRE_DATATYPES(test_nhwc_3x2x100x1);
RUN_TEST_ALL_DLFLOAT16_PRE_DATATYPES(test_nhwc_3x2x100x2);
RUN_TEST_ALL_DLFLOAT16_PRE_DATATYPES(test_nhwc_3x2x100x4);
RUN_TEST_ALL_DLFLOAT16_PRE_DATATYPES(test_nhwc_3x2x100x7);
RUN_TEST_ALL_DLFLOAT16_PRE_DATATYPES(test_nhwc_1x1x1x4);
RUN_TEST_ALL_DLFLOAT16_PRE_DATATYPES(test_nhwc_1x1x1x5);
RUN_TEST_ALL_DLFLOAT16_PRE_DATATYPES(test_nhwc_1x1x1x8);
RUN_TEST_ALL_DLFLOAT16_PRE_DATATYPES(test_nhwc_1x1x1x9);
RUN_TEST_ALL_DLFLOAT16_PRE_DATATYPES(test_nhwc_1x1x1x63);
RUN_TEST_ALL_DLFLOAT16_PRE_DATATYPES(test_nhwc_1x1x1x64);
RUN_TEST_ALL_DLFLOAT16_PRE_DATATYPES(test_nhwc_1x1x1x65);
RUN_TEST_ALL_DLFLOAT16_PRE_DATATYPES(test_nhwc_1x1x1x127);
RUN_TEST_ALL_DLFLOAT16_PRE_DATATYPES(test_nhwc_1x1x1x128);
RUN_TEST_ALL_DLFLOAT16_PRE_DATATYPES(test_3ds_4x4x1);
RUN_TEST_ALL_DLFLOAT16_PRE_DATATYPES(test_3ds_32x32x3);
RUN_TEST_ALL_DLFLOAT16_PRE_DATATYPES(test_2ds_4x2);
RUN_TEST_ALL_DLFLOAT16_PRE_DATATYPES(test_2ds_2x2049);
RUN_TEST_ALL_DLFLOAT16_PRE_DATATYPES(test_nchw_1x1x4x4);
RUN_TEST_ALL_DLFLOAT16_PRE_DATATYPES(test_nchw_1x4x2x3);
RUN_TEST_ALL_DLFLOAT16_PRE_DATATYPES(test_nchw_1x3x32x32);
RUN_TEST_ALL_DLFLOAT16_PRE_DATATYPES(test_nchw_2x129x3x33);
RUN_TEST_ALL_DLFLOAT16_PRE_DATATYPES(test_nchw_1x63x1x32);
RUN_TEST_ALL_DLFLOAT16_PRE_DATATYPES(test_nchw_1x64x1x31);
RUN_TEST_ALL_DLFLOAT16_PRE_DATATYPES(test_nchw_1x64x1x32);
RUN_TEST_ALL_DLFLOAT16_PRE_DATATYPES(test_nchw_1x64x1x33);
RUN_TEST_ALL_DLFLOAT16_PRE_DATATYPES(test_nchw_1x65x1x32);
RUN_TEST_ALL_DLFLOAT16_PRE_DATATYPES(test_nchw_1x127x1x4);
RUN_TEST_ALL_DLFLOAT16_PRE_DATATYPES(test_nchw_1x128x1x4);
RUN_TEST_ALL_DLFLOAT16_PRE_DATATYPES(test_nchw_1x129x1x4);
RUN_TEST_ALL_DLFLOAT16_PRE_DATATYPES(test_nchw_1x4x1x63);
RUN_TEST_ALL_DLFLOAT16_PRE_DATATYPES(test_nchw_1x4x1x64);
RUN_TEST_ALL_DLFLOAT16_PRE_DATATYPES(test_nchw_1x4x1x65);
RUN_TEST_ALL_DLFLOAT16_PRE_DATATYPES(test_rnn_output_5x1x4x3);
RUN_TEST_ALL_DLFLOAT16_PRE_DATATYPES(test_rnn_output_1x1x4x3);
RUN_TEST_ALL_DLFLOAT16_PRE_DATATYPES(test_rnn_output_5x1x4x64);
RUN_TEST_ALL_DLFLOAT16_PRE_DATATYPES(test_rnn_output_1x1x4x64);
RUN_TEST_ALL_DLFLOAT16_PRE_DATATYPES(test_rnn_output_5x1x4x65);
RUN_TEST_ALL_DLFLOAT16_PRE_DATATYPES(test_rnn_output_1x1x4x65);
RUN_TEST_ALL_DLFLOAT16_PRE_DATATYPES(test_rnn_output_5x1x31x5);
RUN_TEST_ALL_DLFLOAT16_PRE_DATATYPES(test_rnn_output_1x1x31x5);
RUN_TEST_ALL_DLFLOAT16_PRE_DATATYPES(test_rnn_output_5x1x60x5);
RUN_TEST_ALL_DLFLOAT16_PRE_DATATYPES(test_rnn_output_1x1x60x5);
RUN_TEST_ALL_DLFLOAT16_PRE_DATATYPES(test_rnn_output_5x2x4x3);
RUN_TEST_ALL_DLFLOAT16_PRE_DATATYPES(test_rnn_output_1x2x4x3);
RUN_TEST_ALL_DLFLOAT16_PRE_DATATYPES(test_rnn_output_5x2x4x64);
RUN_TEST_ALL_DLFLOAT16_PRE_DATATYPES(test_rnn_output_1x2x4x64);
RUN_TEST_ALL_DLFLOAT16_PRE_DATATYPES(test_rnn_output_5x2x4x65);
RUN_TEST_ALL_DLFLOAT16_PRE_DATATYPES(test_rnn_output_1x2x4x65);
RUN_TEST_ALL_DLFLOAT16_PRE_DATATYPES(test_rnn_output_5x2x31x5);
RUN_TEST_ALL_DLFLOAT16_PRE_DATATYPES(test_rnn_output_1x2x31x5);
RUN_TEST_ALL_DLFLOAT16_PRE_DATATYPES(test_rnn_output_5x2x60x5);
RUN_TEST_ALL_DLFLOAT16_PRE_DATATYPES(test_rnn_output_1x2x60x5);
RUN_TEST_ALL_DLFLOAT16_PRE_DATATYPES(test_stickify_unstickify_nhwc_1x4x4x1);
RUN_TEST_ALL_DLFLOAT16_PRE_DATATYPES(test_stickify_unstickify_nhwc_1x4x4x2);
RUN_TEST_ALL_DLFLOAT16_PRE_DATATYPES(test_stickify_unstickify_nhwc_1x32x32x1);
RUN_TEST_ALL_DLFLOAT16_PRE_DATATYPES(test_stickify_unstickify_nhwc_1x32x32x2);
RUN_TEST_ALL_DLFLOAT16_PRE_DATATYPES(test_stickify_unstickify_nhwc_1x32x32x3);
RUN_TEST_ALL_DLFLOAT16_PRE_DATATYPES(test_stickify_unstickify_nhwc_1x2x33x65);
RUN_TEST_ALL_DLFLOAT16_PRE_DATATYPES(test_stickify_unstickify_nchw_1x4x4x1);
RUN_TEST_ALL_DLFLOAT16_PRE_DATATYPES(test_stickify_unstickify_nchw_1x32x32x3);
RUN_TEST_ALL_DLFLOAT16_PRE_DATATYPES(test_stickify_unstickify_nchw_1x2x33x65);
RUN_TEST_ALL_DLFLOAT16_PRE_DATATYPES(test_unstickify_4dfeature_twice);
RUN_TEST_ALL_DLFLOAT16_PRE_DATATYPES(
test_unstickify_transform_desc_invalid_type);
RUN_TEST(test_ztensor_fp16_bad_values);
RUN_TEST(test_ztensor_fp32_bad_values);
return UNITY_END();
}
zDNN-1.1.2/tests/testDriver_utils.c 0000664 0000000 0000000 00000023203 15000221702 0017172 0 ustar 00root root 0000000 0000000 // SPDX-License-Identifier: Apache-2.0
/*
* Copyright IBM Corp. 2021, 2024
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#include "testsupport.h"
#include
#include
#include
#include
void setUp(void) {}
void tearDown(void) {}
void test_num_elements(zdnn_data_layouts layout, uint32_t *shape,
uint64_t exp_pre, uint64_t exp_aiu) {
zdnn_tensor_desc pre_tfrmd_desc, tfrmd_desc;
zdnn_ztensor ztensor;
switch (layout) {
case ZDNN_1D:
zdnn_init_pre_transformed_desc(layout, test_datatype, &pre_tfrmd_desc,
shape[0]);
break;
case ZDNN_2D:
case ZDNN_2DS:
zdnn_init_pre_transformed_desc(layout, test_datatype, &pre_tfrmd_desc,
shape[0], shape[1]);
break;
case ZDNN_3D:
case ZDNN_3DS:
zdnn_init_pre_transformed_desc(layout, test_datatype, &pre_tfrmd_desc,
shape[0], shape[1], shape[2]);
break;
default:
zdnn_init_pre_transformed_desc(layout, test_datatype, &pre_tfrmd_desc,
shape[0], shape[1], shape[2], shape[3]);
break;
}
zdnn_generate_transformed_desc(&pre_tfrmd_desc, &tfrmd_desc);
zdnn_init_ztensor(&pre_tfrmd_desc, &tfrmd_desc, &ztensor);
// Get output from each mode
uint64_t num_elements_pre = get_num_elements(&ztensor, ELEMENTS_PRE);
uint64_t num_elements_aiu = get_num_elements(&ztensor, ELEMENTS_AIU);
// Check each mode's output matches the expected value.
TEST_ASSERT_MESSAGE_FORMATTED(
exp_pre == num_elements_pre,
"For %s tensor we expected %" PRIu64
" elements but ELEMENTS_PRE returned %" PRIu64 " elements",
get_data_layout_str(tfrmd_desc.layout), exp_pre, num_elements_pre);
TEST_ASSERT_MESSAGE_FORMATTED(
exp_aiu == num_elements_aiu,
"For %s tensor we expected %" PRIu64
" elements but ELEMENTS_AIU returned %" PRIu64 " elements",
get_data_layout_str(tfrmd_desc.layout), exp_aiu, num_elements_aiu);
}
void test_num_elements_concat(zdnn_data_layouts layout, zdnn_concat_info info,
uint32_t *shape, uint64_t exp_single_gate,
uint64_t exp_all_gates, uint64_t exp_aiu) {
zdnn_tensor_desc pre_tfrmd_desc, tfrmd_desc;
zdnn_ztensor ztensor;
switch (layout) {
case ZDNN_2DS:
zdnn_init_pre_transformed_desc(layout, test_datatype, &pre_tfrmd_desc,
shape[0], shape[1]);
break;
case ZDNN_3DS:
zdnn_init_pre_transformed_desc(layout, test_datatype, &pre_tfrmd_desc,
shape[0], shape[1], shape[2]);
break;
default:
TEST_FAIL_MESSAGE_FORMATTED("invalid pre-transformed layout: %s",
get_data_layout_str(layout));
}
zdnn_generate_transformed_desc_concatenated(&pre_tfrmd_desc, info,
&tfrmd_desc);
zdnn_init_ztensor(&pre_tfrmd_desc, &tfrmd_desc, &ztensor);
// Get output from each mode
uint64_t num_elements_single_gate =
get_num_elements(&ztensor, ELEMENTS_PRE_SINGLE_GATE);
uint64_t num_elements_all_gates =
get_num_elements(&ztensor, ELEMENTS_PRE_ALL_GATES);
uint64_t num_elements_aiu = get_num_elements(&ztensor, ELEMENTS_AIU);
// Check each mode's output matches the expected value.
TEST_ASSERT_MESSAGE_FORMATTED(
num_elements_single_gate == exp_single_gate,
"For %s tensor we expected %" PRIu64
" elements but ELEMENTS_PRE_SINGLE_GATE returned %" PRIu64
" elements (info = %08x)",
get_data_layout_str(tfrmd_desc.layout), exp_single_gate,
num_elements_single_gate, info);
TEST_ASSERT_MESSAGE_FORMATTED(
"For %s tensor we expected %" PRIu64
" elements but ELEMENTS_PRE_ALL_GATES returned %" PRIu64
" elements (info = %08x)",
get_data_layout_str(tfrmd_desc.layout), exp_all_gates,
num_elements_all_gates, info);
TEST_ASSERT_MESSAGE_FORMATTED(
"For %s tensor we expected %" PRIu64
" elements but ELEMENTS_AIU returned %" PRIu64 " elements (info = %08x)",
get_data_layout_str(tfrmd_desc.layout), exp_aiu, num_elements_aiu, info);
}
/*
* Test to ensure get_num_elements works with a NHWC tensor.
*/
void get_num_elements_nhwc() {
uint32_t shape[] = {1, 4, 4, 1};
test_num_elements(ZDNN_NHWC, shape, 16, 16);
}
/*
* Test to ensure get_num_elements works with a 4D tensor.
*/
void get_num_elements_4d() {
uint32_t shape[] = {1, 32, 15, 5};
test_num_elements(ZDNN_4D, shape, 2400, 2400);
}
/*
* Test to ensure get_num_elements works with a 3DS tensor.
*/
void get_num_elements_3ds() {
uint32_t shape[] = {3, 4, 4};
test_num_elements(ZDNN_3DS, shape, 48, 48);
}
/*
* Test to ensure get_num_elements works with a 3D tensor.
*/
void get_num_elements_3d() {
uint32_t shape[] = {15, 4, 2};
test_num_elements(ZDNN_3D, shape, 120, 120);
}
/*
* Test to ensure get_num_elements works with a 2DS tensor.
*/
void get_num_elements_2ds() {
uint32_t shape[] = {4, 4};
test_num_elements(ZDNN_2DS, shape, 16, 16);
}
/*
* Test to ensure get_num_elements works with a 2D tensor.
*/
void get_num_elements_2d() {
uint32_t shape[] = {15, 4};
test_num_elements(ZDNN_2D, shape, 60, 60);
}
/*
* Test to ensure get_num_elements works with a 1D tensor.
*/
void get_num_elements_1d() {
uint32_t shape[] = {16};
test_num_elements(ZDNN_1D, shape, 16, 16);
}
/*
* Test to ensure get_num_elements works with a 3DS LSTM tensor that doesn't
* require vertical concatenation.
*/
void get_num_elements_lstm_no_vconcat_weights() {
uint32_t shape[] = {2, 3, 4};
for (int i = 0; i < NUM_NO_VCONCAT_INFOS; i++) {
test_num_elements_concat(ZDNN_3DS, RNN_TYPE_LSTM | no_vconcat_infos[i],
shape, 24, 96, 1536);
}
}
/*
* Test to ensure get_num_elements works with a 3DS LSTM tensor that requires
* vertical concatenation.
*/
void get_num_elements_lstm_prev_bidir_weights() {
uint32_t shape[] = {2, 6, 4};
test_num_elements_concat(ZDNN_3DS,
RNN_TYPE_LSTM | PREV_LAYER_BIDIR | USAGE_WEIGHTS,
shape, 48, 192, 65536);
}
/*
* Test to ensure get_num_elements works with a (hidden-)biases 2DS LSTM
* tensor.
*/
void get_num_elements_lstm_biases() {
uint32_t shape[] = {2, 3};
for (int i = 0; i < NUM_PREV_LAYERS; i++) {
for (int j = 0; j < NUM_BIASES_USAGES; j++) {
test_num_elements_concat(
ZDNN_2DS, RNN_TYPE_LSTM | prev_layers[i] | biases_usages[j], shape, 6,
24, 512);
}
}
}
/*
* Test to ensure get_num_elements works with a 3DS GRU tensor that doesn't
* require vertical concatenation.
*/
void get_num_elements_gru_no_vconcat_weights() {
uint32_t shape[] = {2, 3, 4};
for (int i = 0; i < NUM_NO_VCONCAT_INFOS; i++) {
test_num_elements_concat(ZDNN_3DS, RNN_TYPE_GRU | no_vconcat_infos[i],
shape, 24, 72, 1152);
}
}
/*
* Test to ensure get_num_elements works with a 3DS GRU tensor that requires
* vertical concatenation.
*/
void get_num_elements_gru_prev_bidir_weights() {
uint32_t shape[] = {2, 6, 4};
test_num_elements_concat(ZDNN_3DS,
RNN_TYPE_GRU | PREV_LAYER_BIDIR | USAGE_WEIGHTS,
shape, 48, 144, 49152);
}
/*
* Test to ensure get_num_elements works with a (hidden-)biases 2DS GRU
* tensor.
*/
void get_num_elements_gru_biases() {
uint32_t shape[] = {2, 3};
for (int i = 0; i < NUM_PREV_LAYERS; i++) {
for (int j = 0; j < NUM_BIASES_USAGES; j++) {
test_num_elements_concat(ZDNN_2DS,
RNN_TYPE_GRU | prev_layers[i] | biases_usages[j],
shape, 6, 18, 384);
}
}
}
/*
* Test to ensure get_num_elements works with an RNN uni output tensor, which
* the ELEMENTS_AIU result will not have any padding
*/
void get_num_elements_uni_output() {
uint32_t shape[] = {2, 1, 3, 4};
test_num_elements(ZDNN_4DS, shape, 24, 24);
}
/*
* Test to ensure get_num_elements works with an RNN bidir output tensor, which
* the ELEMENTS_AIU result WILL have paddings
*/
void get_num_elements_bidir_output() {
uint32_t shape[] = {2, 2, 3, 4};
test_num_elements(ZDNN_4DS, shape, 48, 768);
}
int main() {
UNITY_BEGIN();
RUN_TEST_ALL_PRE_DATATYPES(get_num_elements_nhwc);
RUN_TEST_ALL_PRE_DATATYPES(get_num_elements_4d);
RUN_TEST_ALL_PRE_DATATYPES(get_num_elements_3ds);
RUN_TEST_ALL_PRE_DATATYPES(get_num_elements_3d);
RUN_TEST_ALL_PRE_DATATYPES(get_num_elements_2ds);
RUN_TEST_ALL_PRE_DATATYPES(get_num_elements_2d);
RUN_TEST_ALL_PRE_DATATYPES(get_num_elements_1d);
RUN_TEST_ALL_DLFLOAT16_PRE_DATATYPES(
get_num_elements_lstm_no_vconcat_weights);
RUN_TEST_ALL_DLFLOAT16_PRE_DATATYPES(
get_num_elements_lstm_prev_bidir_weights);
RUN_TEST_ALL_DLFLOAT16_PRE_DATATYPES(get_num_elements_lstm_biases);
RUN_TEST_ALL_DLFLOAT16_PRE_DATATYPES(get_num_elements_gru_no_vconcat_weights);
RUN_TEST_ALL_DLFLOAT16_PRE_DATATYPES(get_num_elements_gru_prev_bidir_weights);
RUN_TEST_ALL_DLFLOAT16_PRE_DATATYPES(get_num_elements_gru_biases);
RUN_TEST_ALL_DLFLOAT16_PRE_DATATYPES(get_num_elements_uni_output);
RUN_TEST_ALL_DLFLOAT16_PRE_DATATYPES(get_num_elements_bidir_output);
return UNITY_END();
}
zDNN-1.1.2/tests/testDriver_version.c 0000664 0000000 0000000 00000020763 15000221702 0017527 0 ustar 00root root 0000000 0000000 // SPDX-License-Identifier: Apache-2.0
/*
* Copyright IBM Corp. 2021, 2024
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#include "testsupport.h"
#include "version.h"
#include
#define MAJOR_NEWER(x) (x + 0x00020000)
#define MAJOR_OLDER(x) (x - 0x00020000)
#define MINOR_NEWER(x) (x + 0x00000200)
#define MINOR_OLDER(x) (x - 0x00000200)
#define PATCH_NEWER(x) (x + 0x00000002)
#define PATCH_OLDER(x) (x - 0x00000002)
void setUp(void) {
#ifndef VERSION_C_TEST
TEST_IGNORE_MESSAGE("VERSION_C_TEST required for test. Skipping.");
#endif
aiu_lib_vernum = AIU_UNKNOWN;
}
void tearDown(void) {}
// ***************************************************
// Under VERSION_C_TEST library version is always: 5.5.5
// ***************************************************
void test_version_runnable(uint32_t app_vernum, uint32_t new_aiu_lib_vernum,
bool exp_result) {
aiu_lib_vernum = new_aiu_lib_vernum;
TEST_ASSERT_MESSAGE_FORMATTED(
zdnn_is_version_runnable(app_vernum) == exp_result,
"zdnn_is_version_runnable() did not return %d", exp_result);
}
// ************************
// *** MAJOR ver tests
// ************************
// ---------------------------------------------------------
// | app | hw | library | runnable?
// ---------------------------------------------------------
// | 5.5.5 | 7.x.x | 5.5.5 | no
// | 7.x.x | 5.5.5 | 5.5.5 | no
// | 7.x.x | 7.x.x | 5.5.5 | no
// | 5.3.x | 5.5.x | 5.5.5 | yes
// ---------------------------------------------------------
void hw_major_newer_fail() {
test_version_runnable(ZDNN_VERNUM, MAJOR_NEWER(ZDNN_VERNUM), false);
}
void app_major_newer_fail() {
test_version_runnable(MAJOR_NEWER(ZDNN_VERNUM), ZDNN_VERNUM, false);
}
void lib_major_older_fail() {
test_version_runnable(MAJOR_NEWER(ZDNN_VERNUM), MAJOR_NEWER(ZDNN_VERNUM),
false);
}
void major_all_match_pass() {
test_version_runnable(MINOR_OLDER(ZDNN_VERNUM), ZDNN_VERNUM, true);
}
// ************************
// *** MINOR ver tests
// ************************
// ---------------------------------------------------------
// | app | hw | library | runnable?
// ---------------------------------------------------------
// | 5.7.5 | 5.5.5 | 5.5.5 | no
// | 5.3.5 | 5.5.5 | 5.5.5 | yes
// | 5.5.5 | 5.7.5 | 5.5.5 | yes
// | 5.5.5 | 5.3.5 | 5.5.5 | no
// | 5.3.5 | 5.3.5 | 5.5.5 | yes
// | 5.7.5 | 5.7.5 | 5.5.5 | no
// ---------------------------------------------------------
// | 5.3.5 | 5.7.5 | 5.5.5 | yes
// | 5.1.5 | 5.3.5 | 5.5.5 | yes
// | 5.3.5 | 5.1.5 | 5.5.5 | no
// ---------------------------------------------------------
void app_minor_newer_fail() {
test_version_runnable(MINOR_NEWER(ZDNN_VERNUM), ZDNN_VERNUM, false);
}
void app_minor_older_pass() {
test_version_runnable(MINOR_OLDER(ZDNN_VERNUM), ZDNN_VERNUM, true);
}
void hw_minor_newer_pass() {
test_version_runnable(ZDNN_VERNUM, MINOR_NEWER(ZDNN_VERNUM), true);
}
void hw_minor_older_fail() {
test_version_runnable(ZDNN_VERNUM, MINOR_OLDER(ZDNN_VERNUM), false);
}
void lib_minor_newer_pass() {
test_version_runnable(MINOR_OLDER(ZDNN_VERNUM), MINOR_OLDER(ZDNN_VERNUM),
true);
}
void lib_minor_older_fail() {
test_version_runnable(MINOR_NEWER(ZDNN_VERNUM), MINOR_NEWER(ZDNN_VERNUM),
false);
}
void app_minor_older_hw_minor_newer_pass() {
test_version_runnable(MINOR_OLDER(ZDNN_VERNUM), MINOR_NEWER(ZDNN_VERNUM),
true);
}
void app_minor_older_hw_minor_even_older_pass() {
test_version_runnable(MINOR_OLDER(ZDNN_VERNUM),
MINOR_OLDER(MINOR_OLDER(ZDNN_VERNUM)), false);
}
// ************************
// *** Mixed MAJOR/MINOR ver tests
// ************************
// all of these are the runnable = yes cases in MINOR ver tests but now with
// different MAJOR ver, so they all become runnable = no
// ---------------------------------------------------------
// | app | hw | library | runnable?
// ---------------------------------------------------------
// | 7.3.5 | 5.5.5 | 5.5.5 | no
// | 5.5.5 | 7.7.5 | 5.5.5 | no
// | 3.3.5 | 7.3.5 | 5.5.5 | no
// | 7.3.5 | 3.7.5 | 5.5.5 | no
// | 5.1.5 | 3.3.5 | 5.5.5 | no
// ---------------------------------------------------------
void mixed_app_major_newer_fail() {
test_version_runnable(MAJOR_NEWER(MINOR_OLDER(ZDNN_VERNUM)), ZDNN_VERNUM,
false);
}
void mixed_hw_major_newer_fail() {
test_version_runnable(ZDNN_VERNUM, MAJOR_NEWER(MINOR_NEWER(ZDNN_VERNUM)),
false);
}
void mixed_app_major_older_hw_major_newer_fail() {
test_version_runnable(MAJOR_OLDER(MINOR_OLDER(ZDNN_VERNUM)),
MAJOR_NEWER(MINOR_OLDER(ZDNN_VERNUM)), false);
}
void mixed_app_major_newer_hw_major_older_fail() {
test_version_runnable(MAJOR_NEWER(MINOR_OLDER(ZDNN_VERNUM)),
MAJOR_OLDER(MINOR_NEWER(ZDNN_VERNUM)), false);
}
void mixed_hw_major_older_fail() {
test_version_runnable(MINOR_OLDER(MINOR_OLDER(ZDNN_VERNUM)),
MAJOR_OLDER(MINOR_OLDER(ZDNN_VERNUM)), false);
}
// ************************
// *** PATCH ver tests
// ************************
// Everything passes
void app_patch_newer_pass() {
test_version_runnable(PATCH_NEWER(ZDNN_VERNUM), ZDNN_VERNUM, true);
}
void app_patch_older_pass() {
test_version_runnable(PATCH_OLDER(ZDNN_VERNUM), ZDNN_VERNUM, true);
}
void hw_patch_newer_pass() {
test_version_runnable(ZDNN_VERNUM, PATCH_NEWER(ZDNN_VERNUM), true);
}
void hw_patch_older_pass() {
test_version_runnable(ZDNN_VERNUM, PATCH_OLDER(ZDNN_VERNUM), true);
}
void lib_patch_newer_pass() {
test_version_runnable(PATCH_OLDER(ZDNN_VERNUM), PATCH_OLDER(ZDNN_VERNUM),
true);
}
void lib_patch_older_pass() {
test_version_runnable(PATCH_NEWER(ZDNN_VERNUM), PATCH_NEWER(ZDNN_VERNUM),
true);
}
// ************************
// *** get_max_runnable tests
// ************************
void test_get_max_runnable(uint32_t exp_vernum) {
uint32_t vernum = zdnn_get_max_runnable_version();
TEST_ASSERT_MESSAGE_FORMATTED(
vernum == exp_vernum,
"zdnn_get_max_runnable_version() did not return %08x (found: %08x)",
exp_vernum, vernum);
}
void test_max_ver_hw_major_newer() {
aiu_lib_vernum = MAJOR_NEWER(ZDNN_VERNUM);
test_get_max_runnable(AIU_UNKNOWN);
}
void test_max_ver_hw_major_older() {
aiu_lib_vernum = MAJOR_OLDER(ZDNN_VERNUM);
test_get_max_runnable(AIU_UNKNOWN);
}
void test_max_ver_hw_minor_newer() {
aiu_lib_vernum = MINOR_NEWER(ZDNN_VERNUM);
test_get_max_runnable(ZDNN_VERNUM | 0xFF);
}
void test_max_ver_hw_minor_older() {
aiu_lib_vernum = MINOR_OLDER(ZDNN_VERNUM);
test_get_max_runnable(MINOR_OLDER(ZDNN_VERNUM) | 0xFF);
}
void test_max_ver_hw_patch_newer() {
aiu_lib_vernum = PATCH_OLDER(ZDNN_VERNUM);
test_get_max_runnable(ZDNN_VERNUM | 0xFF);
}
int main(void) {
UNITY_BEGIN();
RUN_TEST(hw_major_newer_fail);
RUN_TEST(app_major_newer_fail);
RUN_TEST(lib_major_older_fail);
RUN_TEST(major_all_match_pass);
RUN_TEST(app_minor_newer_fail);
RUN_TEST(app_minor_older_pass);
RUN_TEST(hw_minor_newer_pass);
RUN_TEST(hw_minor_older_fail);
RUN_TEST(lib_minor_newer_pass);
RUN_TEST(lib_minor_older_fail);
RUN_TEST(app_minor_older_hw_minor_newer_pass);
RUN_TEST(app_minor_older_hw_minor_even_older_pass);
RUN_TEST(mixed_app_major_newer_fail);
RUN_TEST(mixed_hw_major_newer_fail);
RUN_TEST(mixed_app_major_older_hw_major_newer_fail);
RUN_TEST(mixed_app_major_newer_hw_major_older_fail);
RUN_TEST(mixed_hw_major_older_fail);
RUN_TEST(app_patch_newer_pass);
RUN_TEST(app_patch_older_pass);
RUN_TEST(hw_patch_newer_pass);
RUN_TEST(hw_patch_older_pass);
RUN_TEST(lib_patch_newer_pass);
RUN_TEST(lib_patch_older_pass);
RUN_TEST(test_max_ver_hw_major_newer);
RUN_TEST(test_max_ver_hw_major_older);
RUN_TEST(test_max_ver_hw_minor_newer);
RUN_TEST(test_max_ver_hw_minor_older);
RUN_TEST(test_max_ver_hw_patch_newer);
return UNITY_END();
}
zDNN-1.1.2/tests/testDriver_version_detect.c 0000664 0000000 0000000 00000020033 15000221702 0021045 0 ustar 00root root 0000000 0000000 // SPDX-License-Identifier: Apache-2.0
/*
* Copyright IBM Corp. 2021, 2024
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#include "testsupport.h"
#include "version.h"
#include
// magic-numbering these to check against what's in version.h
#define LIB_VERNUM_Z16 0x00010000
#define LIB_VERNUM_NEWER_MAJOR LIB_VERNUM(7, 5, 5)
#define LIB_VERNUM_NEWER_MINOR LIB_VERNUM(5, 7, 5)
#define LIB_VERNUM_BASELINE LIB_VERNUM(5, 5, 5)
#define LIB_VERNUM_OLDER_MINOR LIB_VERNUM(5, 3, 5)
#define LIB_VERNUM_OLDER_MAJOR LIB_VERNUM(3, 5, 5)
// newer major: newer minor + mdis bump
aiu_hwinfo aiu_hwinfo_newer_major = {
{0x00, 0x11, 0x11, 0x11}, {0x00, 0x01}, 7, 5, {0x00, 0x11}, "newer major",
LIB_VERNUM_NEWER_MAJOR};
// newer minor: baseline + blk1 2nd byte bit bump + blk2 2nd byte bit bump
aiu_hwinfo aiu_hwinfo_newer_minor = {
{0x00, 0x11, 0x11, 0x11}, {0x00, 0x01}, 5, 5, {0x00, 0x11}, "newer minor",
LIB_VERNUM_NEWER_MINOR};
aiu_hwinfo aiu_hwinfo_baseline = {
{0x00, 0x01, 0x11, 0x11}, {0x00, 0x00}, 5, 5, {0x00, 0x11}, "baseline",
LIB_VERNUM_BASELINE};
// older minor: baseline - blk3 2nd byte bit nerf
aiu_hwinfo aiu_hwinfo_older_minor = {
{0x00, 0x01, 0x11, 0x11}, {0x00, 0x00}, 5, 5, {0x00, 0x10}, "older minor",
LIB_VERNUM_OLDER_MINOR};
// older major: older minor - blk1 3rd byte bit nerf - mts nerf
aiu_hwinfo aiu_hwinfo_older_major = {
{0x00, 0x01, 0x10, 0x11}, {0x00, 0x00}, 5, 3, {0x00, 0x10}, "older major",
LIB_VERNUM_OLDER_MAJOR};
void reset_qaf_result() {
// QAF now has the information of the baseline machine
memset(&nnpa_query_result, 0, sizeof(nnpa_qaf_parameter_block));
memcpy(QAF_BLK1_PTR, &aiu_hwinfo_baseline.blk1, HWINFO_BLK1_LEN);
memcpy(QAF_BLK2_PTR, &aiu_hwinfo_baseline.blk2, HWINFO_BLK2_LEN);
QAF_VAL1 = aiu_hwinfo_baseline.val1;
QAF_VAL2 = aiu_hwinfo_baseline.val2;
memcpy(QAF_BLK3_PTR, &aiu_hwinfo_baseline.blk3, HWINFO_BLK3_LEN);
}
void setUp(void) {}
void tearDown(void) {}
// ************************
// *** LIB_VERNUM tests
// ************************
void test_lib_vernum_nnpa() {
VERIFY_HW_ENV; // verify required HW env is available.
refresh_aiu_lib_vernum();
uint32_t expected_lib_vernum;
if (zdnn_is_nnpa_parmblk_fmt_installed(1, NNPA_PARMBLKFORMAT_1) == true) {
expected_lib_vernum = 0x00010100;
} else {
expected_lib_vernum = 0x00010000;
}
TEST_ASSERT_MESSAGE_FORMATTED(aiu_lib_vernum == expected_lib_vernum,
"aiu_lib_vernum is not detected as %08" PRIx32,
expected_lib_vernum);
}
// **************************************************
// *** LIB_VERNUM detection tests - Fake machines
// **************************************************
void test_baseline_exact() {
reset_qaf_result();
refresh_aiu_lib_vernum();
TEST_ASSERT_MESSAGE_FORMATTED(
aiu_lib_vernum == LIB_VERNUM_BASELINE,
"aiu_lib_vernum is not detected as %08x (found: %08x)",
LIB_VERNUM_BASELINE, aiu_lib_vernum);
}
void test_newer_minor_exact() {
reset_qaf_result();
*((char *)QAF_BLK1_PTR + 1) = 0x11;
*((char *)QAF_BLK2_PTR + 1) = 0x01;
refresh_aiu_lib_vernum();
TEST_ASSERT_MESSAGE_FORMATTED(
aiu_lib_vernum == LIB_VERNUM_NEWER_MINOR,
"aiu_lib_vernum is not detected as %08x (found: %08x)",
LIB_VERNUM_NEWER_MINOR, aiu_lib_vernum);
}
void test_newer_major_exact() {
reset_qaf_result();
*((char *)QAF_BLK1_PTR + 1) = 0x11;
*((char *)QAF_BLK2_PTR + 1) = 0x01;
QAF_VAL1 = 7;
refresh_aiu_lib_vernum();
TEST_ASSERT_MESSAGE_FORMATTED(
aiu_lib_vernum == LIB_VERNUM_NEWER_MAJOR,
"aiu_lib_vernum is not detected as %08x (found: %08x)",
LIB_VERNUM_NEWER_MAJOR, aiu_lib_vernum);
}
void test_older_minor_exact() {
reset_qaf_result();
*((char *)QAF_BLK3_PTR + 1) = 0x10;
refresh_aiu_lib_vernum();
TEST_ASSERT_MESSAGE_FORMATTED(
aiu_lib_vernum == LIB_VERNUM_OLDER_MINOR,
"aiu_lib_vernum is not detected as %08x (found: %08x)",
LIB_VERNUM_OLDER_MINOR, aiu_lib_vernum);
}
void test_older_major_exact() {
reset_qaf_result();
*((char *)QAF_BLK1_PTR + 2) = 0x10;
*((char *)QAF_BLK3_PTR + 1) = 0x10;
QAF_VAL2 = 3;
refresh_aiu_lib_vernum();
TEST_ASSERT_MESSAGE_FORMATTED(
aiu_lib_vernum == LIB_VERNUM_OLDER_MAJOR,
"aiu_lib_vernum is not detected as %08x (found: %08x)",
LIB_VERNUM_OLDER_MAJOR, aiu_lib_vernum);
}
void test_exceeds_newer_minor_but_not_newer_major() {
// turn on all bits, leave val1 and val2 at 5 and 5
memset(&nnpa_query_result, 0xff, sizeof(nnpa_qaf_parameter_block));
QAF_VAL1 = 5;
QAF_VAL2 = 5;
refresh_aiu_lib_vernum();
TEST_ASSERT_MESSAGE_FORMATTED(
aiu_lib_vernum == LIB_VERNUM_NEWER_MINOR,
"aiu_lib_vernum is not detected as %08x (found: %08x)",
LIB_VERNUM_NEWER_MINOR, aiu_lib_vernum);
}
void test_older_minor_enough_but_not_baseline() {
reset_qaf_result();
*((char *)QAF_BLK1_PTR) = 0xFF; // better blk1 than baseline
*((char *)QAF_BLK3_PTR + 1) = 0x10; // worse blk3 than baseline
refresh_aiu_lib_vernum();
TEST_ASSERT_MESSAGE_FORMATTED(
aiu_lib_vernum == LIB_VERNUM_OLDER_MINOR,
"aiu_lib_vernum is not detected as %08x (found: %08x)",
LIB_VERNUM_OLDER_MINOR, aiu_lib_vernum);
}
void test_all_flags_on_but_older_vals() {
// turn on all bits, set val1 and val2 at 3 and 3 so they are worse than older
// major
memset(&nnpa_query_result, 0xff, sizeof(nnpa_qaf_parameter_block));
QAF_VAL1 = 3;
QAF_VAL2 = 3;
refresh_aiu_lib_vernum();
TEST_ASSERT_MESSAGE_FORMATTED(
aiu_lib_vernum == AIU_UNKNOWN,
"aiu_lib_vernum is not detected as %08x (found: %08x)", AIU_UNKNOWN,
aiu_lib_vernum);
}
void test_super_mythical() {
// turn on all bits, set val1 and val2 at 100, 100 so it exceeds newer major
memset(&nnpa_query_result, 0xff, sizeof(nnpa_qaf_parameter_block));
QAF_VAL1 = 100;
QAF_VAL2 = 100;
refresh_aiu_lib_vernum();
TEST_ASSERT_MESSAGE_FORMATTED(
aiu_lib_vernum == LIB_VERNUM_NEWER_MAJOR,
"aiu_lib_vernum is not detected as %08x (found: %08x)",
LIB_VERNUM_NEWER_MAJOR, aiu_lib_vernum);
}
void test_super_old1() {
// even fewer bits on than older major
memset(&nnpa_query_result, 0x00, sizeof(nnpa_qaf_parameter_block));
*((char *)QAF_BLK3_PTR + 1) = 18;
QAF_VAL1 = aiu_hwinfo_baseline.val1;
QAF_VAL2 = aiu_hwinfo_baseline.val2;
refresh_aiu_lib_vernum();
TEST_ASSERT_MESSAGE_FORMATTED(
aiu_lib_vernum == AIU_UNKNOWN,
"aiu_lib_vernum is not detected as %08x (found: %08x)", AIU_UNKNOWN,
aiu_lib_vernum);
}
void test_super_old2() {
// even lower val1 than older major
reset_qaf_result();
QAF_VAL1 = 2;
refresh_aiu_lib_vernum();
TEST_ASSERT_MESSAGE_FORMATTED(
aiu_lib_vernum == AIU_UNKNOWN,
"aiu_lib_vernum is not detected as %08x (found: %08x)", AIU_UNKNOWN,
aiu_lib_vernum);
}
int main(void) {
UNITY_BEGIN();
RUN_TEST(test_lib_vernum_nnpa);
// only tests with fake machines this point forward
aiu_hwinfo_list[0] = &aiu_hwinfo_newer_major;
aiu_hwinfo_list[1] = &aiu_hwinfo_newer_minor;
aiu_hwinfo_list[2] = &aiu_hwinfo_baseline;
aiu_hwinfo_list[3] = &aiu_hwinfo_older_minor;
aiu_hwinfo_list[4] = &aiu_hwinfo_older_major;
RUN_TEST(test_baseline_exact);
RUN_TEST(test_newer_minor_exact);
RUN_TEST(test_newer_major_exact);
RUN_TEST(test_older_minor_exact);
RUN_TEST(test_older_major_exact);
RUN_TEST(test_exceeds_newer_minor_but_not_newer_major);
RUN_TEST(test_older_minor_enough_but_not_baseline);
RUN_TEST(test_all_flags_on_but_older_vals);
RUN_TEST(test_super_mythical);
RUN_TEST(test_super_old1);
RUN_TEST(test_super_old2);
return UNITY_END();
}
zDNN-1.1.2/tests/testDriver_zdnn_add_elwise.c 0000664 0000000 0000000 00000015006 15000221702 0021165 0 ustar 00root root 0000000 0000000 // SPDX-License-Identifier: Apache-2.0
/*
* Copyright IBM Corp. 2021, 2024
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#include "common_elwise.h"
void setUp(void) { VERIFY_HW_ENV; }
void tearDown(void) {}
/*
* Simple test to drive a full add api.
*/
void api_add_basic() {
// Input and outputs expect the same shape so just define it once
uint32_t shape[] = {1, 2, 2, 2};
/* Input 1 values as NHWC
[[
[[1, 10], [2, 20]],
[[4, 40], [5, 50]]
]]
*/
float input1_values[] = {1, 10, 2, 20, 4, 40, 5, 50};
/* Input 2 values as NHWC
[[
[[3, 30], [6, 60]],
[[8, 80], [9, 90]]
]]
*/
float input2_values[] = {3, 30, 6, 60, 8, 80, 9, 90};
/* Expected values as NHWC (test method will generate this array)
[[
[[4, 40], [8, 80]],
[[12, 120], [14, 140]]
]]
*/
test_elwise_api_2_inputs(shape, ZDNN_NHWC, input1_values, input2_values,
NNPA_ADD, ZDNN_OK);
}
// test to drive input tensors with 320 values in their buffer
void api_add_med_dims() {
// Input and outputs expect the same shape so just define it once
uint32_t shape[] = {1, 8, 10, 4};
int num_io_buffer_values = shape[0] * shape[1] * shape[2] * shape[3];
// Values in ZDNN_NHWC order
float input1_values[num_io_buffer_values];
gen_random_float_array(num_io_buffer_values, input1_values);
// Values in ZDNN_NHWC order
float input2_values[num_io_buffer_values];
gen_random_float_array(num_io_buffer_values, input2_values);
test_elwise_api_2_inputs(shape, ZDNN_NHWC, input1_values, input2_values,
NNPA_ADD, ZDNN_OK);
}
// test to drive input tensors with 6825 values in their buffer
void api_add_high_dims() {
// Input and outputs expect the same shape so just define it once
uint32_t shape[] = {1, 3, 33, 65};
int num_io_buffer_values = shape[0] * shape[1] * shape[2] * shape[3];
// Values in ZDNN_NHWC order
float input1_values[num_io_buffer_values];
gen_random_float_array(num_io_buffer_values, input1_values);
// Values in ZDNN_NHWC order
float input2_values[num_io_buffer_values];
gen_random_float_array(num_io_buffer_values, input2_values);
test_elwise_api_2_inputs(shape, ZDNN_NHWC, input1_values, input2_values,
NNPA_ADD, ZDNN_OK);
}
/*
* Simple test to drive a full add api using the data type
* and 3 dimensional tensors
*/
void api_add_3D() {
// Input and outputs expect the same shape so just define it once
uint32_t shape[] = {2, 2, 2};
/* Input 1 values as NHWC
[[
[[1, 10], [2, 20]],
[[4, 40], [5, 50]]
]]
*/
float input1_values[] = {1, 10, 2, 20, 4, 40, 5, 50};
/* Input 2 values as NHWC
[[
[[3, 30], [6, 60]],
[[8, 80], [9, 90]]
]]
*/
float input2_values[] = {3, 30, 6, 60, 8, 80, 9, 90};
/* Expected values as NHWC (test method will generate this array)
[[
[[4, 40], [8, 80]],
[[12, 120], [14, 140]]
]]
*/
test_elwise_api_2_inputs(shape, ZDNN_3D, input1_values, input2_values,
NNPA_ADD, ZDNN_OK);
}
/*
* Simple test to drive a full add api using the data type
* and 2 dimensional tensors
*/
void api_add_2D() {
// Input and outputs expect the same shape so just define it once
uint32_t shape[] = {2, 2};
/* Input 1 values as NHWC
[[
[[1, 10], [2, 20]]
]]
*/
float input1_values[] = {1, 10, 2, 20};
/* Input 2 values as NHWC
[[
[[3, 30], [6, 60]]
]]
*/
float input2_values[] = {3, 30, 6, 60, 8, 80, 9, 90};
/* Expected values as NHWC (test method will generate this array)
[[
[[4, 40], [8, 80]]
]]
*/
test_elwise_api_2_inputs(shape, ZDNN_2D, input1_values, input2_values,
NNPA_ADD, ZDNN_OK);
}
/*
* Simple test to drive a full add api using the data type
* and 1 dimensional tensors
*/
void api_add_1D() {
// Input and outputs expect the same shape so just define it once
uint32_t shape[] = {2};
/* Input 1 values as NHWC
[[
[[10000, 12000]]
]]
*/
float input1_values[] = {10000, 12000};
/* Input 2 values as NHWC
[[
[[860, 1400]]
]]
*/
float input2_values[] = {860, 1400};
/* Expected values as NHWC (test method will generate this array)
[[
[[10860, 13400]]
]]
*/
test_elwise_api_2_inputs(shape, ZDNN_1D, input1_values, input2_values,
NNPA_ADD, ZDNN_OK);
}
/*
* Simple test to drive a full add api that hits an overflow.
*/
void api_add_overflow() {
// Input and outputs expect the same shape so just define it once
uint32_t shape[] = {1, 2, 2, 2};
/* Input 1 values as NHWC
[[
[[1, 10], [MAX_DLF16 * 0.75, 20]],
[[4, 40], [5, 50]]
]]
*/
float input1_values[] = {1, 10, MAX_DLF16 * 0.75, 20, 4, 40, 5, 50};
/* Input 2 values as NHWC
[[
[[3, 30], [MAX_DLF16 * 0.75, 60]],
[[8, 80], [9, 90]]
]]
*/
float input2_values[] = {3, 30, MAX_DLF16 * 0.75 + 1.0, 60, 8, 80, 9, 90};
/* Expected values as NHWC (test method will generate this array)
[[
[[4, 40], [OVERFLOW, 80]],
[[12, 120], [14, 140]]
]]
*/
// when overflow/underflow happens, zAIU sets range violation flag
test_elwise_api_2_inputs_adv(shape, ZDNN_NHWC, FP32, input1_values,
input2_values, NNPA_ADD,
ZDNN_ELEMENT_RANGE_VIOLATION);
test_elwise_api_2_inputs_adv(shape, ZDNN_NHWC, BFLOAT, input1_values,
input2_values, NNPA_ADD,
ZDNN_ELEMENT_RANGE_VIOLATION);
// Note: We can't create an add/sub overflow/underflow with values that
// originate as FP16s, since FP16's max is way below the DLFloat max.
}
int main() {
UNITY_BEGIN();
RUN_TEST_ALL_DLFLOAT16_PRE_DATATYPES(api_add_basic);
RUN_TEST_ALL_DLFLOAT16_PRE_DATATYPES(api_add_med_dims);
RUN_TEST_ALL_DLFLOAT16_PRE_DATATYPES(api_add_high_dims);
RUN_TEST_ALL_DLFLOAT16_PRE_DATATYPES(api_add_3D);
RUN_TEST_ALL_DLFLOAT16_PRE_DATATYPES(api_add_2D);
RUN_TEST_ALL_DLFLOAT16_PRE_DATATYPES(api_add_1D);
RUN_TEST(api_add_overflow);
return UNITY_END();
}
zDNN-1.1.2/tests/testDriver_zdnn_avgpool2d_maxpool2d_pool.c 0000664 0000000 0000000 00000077635 15000221702 0024020 0 ustar 00root root 0000000 0000000 // SPDX-License-Identifier: Apache-2.0
/*
* Copyright IBM Corp. 2021, 2024
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#include "common_pool.h"
void setUp(void) {
// note: maxpool2d is actually OK with default tolerance values, but avgpool2d
// needs custom tolerance
tol_bfloat.ulps = 64;
tol_bfloat.epsilon_mult = (0.1 / EPSILON_BFLOAT) + 1;
tol_fp16.ulps = 64;
tol_fp16.epsilon_mult = (0.1 / EPSILON_FP16) + 1;
tol_fp32.ulps = 64 * 16384;
tol_fp32.epsilon_mult = (0.1 / EPSILON_FLOAT) + 1;
VERIFY_HW_ENV;
}
void tearDown(void) {}
/*
* Simple test of basic pool with non-zero strides and SAME_PADDING
*/
void maxpool2d_same_basic() {
zdnn_data_layouts layout = ZDNN_NHWC;
/* Visualization of input values
[[
[[1, 10], [2, 20], [3, 30]],
[[4, 40], [5, 50], [6, 60]],
[[7, 70], [8, 80], [9, 90]]
]]
*/
uint32_t input_shape[] = {1, 3, 3, 2};
float input_values[] = {1, 10, 2, 20, 3, 30, 4, 40, 5,
50, 6, 60, 7, 70, 8, 80, 9, 90};
// Input pooling arguments
zdnn_pool_padding padding_type = SAME_PADDING;
uint32_t kernel_height = 2;
uint32_t kernel_width = 2;
uint32_t stride_height = 2;
uint32_t stride_width = 2;
/* Visualization of expected values
[[
[[5, 50], [6, 60]],
[[8, 80], [9, 90]]
]]
*/
uint32_t output_shape[] = {1, 2, 2, 2};
float expected_values[] = {5, 50, 6, 60, 8, 80, 9, 90};
test_pool_function(NNPA_MAXPOOL2D, input_shape, layout, false, input_values,
padding_type, kernel_height, kernel_width, stride_height,
stride_width, output_shape, layout, ZDNN_OK, false,
expected_values);
}
/*
* Simple test of basic pool with non-zero strides and VALID_PADDING
*/
void maxpool2d_valid_basic() {
zdnn_data_layouts layout = ZDNN_NHWC;
/* Visualization of input values
[[
[[1, 10], [2, 20], [3, 30]],
[[4, 40], [5, 50], [6, 60]],
[[7, 70], [8, 80], [9, 90]]
]]
*/
uint32_t input_shape[] = {1, 3, 3, 2};
float input_values[] = {1, 10, 2, 20, 3, 30, 4, 40, 5,
50, 6, 60, 7, 70, 8, 80, 9, 90};
// Input pooling arguments
zdnn_pool_padding padding_type = VALID_PADDING;
uint32_t kernel_height = 2;
uint32_t kernel_width = 2;
uint32_t stride_height = 2;
uint32_t stride_width = 2;
/* Visualization of expected values
[[
[[5, 50]],
]]
*/
uint32_t output_shape[] = {1, 1, 1, 2};
float expected_values[] = {5.0, 50.0};
test_pool_function(NNPA_MAXPOOL2D, input_shape, layout, false, input_values,
padding_type, kernel_height, kernel_width, stride_height,
stride_width, output_shape, layout, ZDNN_OK, false,
expected_values);
}
/*
* Simple test of basic pool with non-zero strides and SAME_PADDING
*/
void avgpool2d_same_basic() {
zdnn_data_layouts layout = ZDNN_NHWC;
/* Visualization of input values
[[
[[1, 10], [2, 20], [3, 30]],
[[4, 40], [5, 50], [6, 60]],
[[7, 70], [8, 80], [9, 90]]
]]
*/
uint32_t input_shape[] = {1, 3, 3, 2};
float input_values[] = {1, 10, 2, 20, 3, 30, 4, 40, 5,
50, 6, 60, 7, 70, 8, 80, 9, 90};
// Input pooling arguments
zdnn_pool_padding padding_type = SAME_PADDING;
uint32_t kernel_height = 2;
uint32_t kernel_width = 2;
uint32_t stride_height = 2;
uint32_t stride_width = 2;
/* Visualization of expected values
[[
[[ 3, 30], [ 4.5, 45]],
[[ 7.5, 75], [ 9, 90]]
]]
*/
uint32_t output_shape[] = {1, 2, 2, 2};
float expected_values[] = {3.0, 30.0, 4.5, 45, 7.5, 75, 9, 90};
test_pool_function(NNPA_AVGPOOL2D, input_shape, layout, false, input_values,
padding_type, kernel_height, kernel_width, stride_height,
stride_width, output_shape, layout, ZDNN_OK, false,
expected_values);
}
/*
* Simple test of basic pool with non-zero strides and VALID_PADDING
*/
void avgpool2d_valid_basic() {
zdnn_data_layouts layout = ZDNN_NHWC;
/* Visualization of input values
[[
[[1, 10], [2, 20], [3, 30]],
[[4, 40], [5, 50], [6, 60]],
[[7, 70], [8, 80], [9, 90]]
]]
*/
uint32_t input_shape[] = {1, 3, 3, 2};
float input_values[] = {1, 10, 2, 20, 3, 30, 4, 40, 5,
50, 6, 60, 7, 70, 8, 80, 9, 90};
// Input pooling arguments
zdnn_pool_padding padding_type = VALID_PADDING;
uint32_t kernel_height = 2;
uint32_t kernel_width = 2;
uint32_t stride_height = 2;
uint32_t stride_width = 2;
/* Visualization of expected values
[[
[[3, 30]],
]]
*/
uint32_t output_shape[] = {1, 1, 1, 2};
float expected_values[] = {3.0, 30.0};
test_pool_function(NNPA_AVGPOOL2D, input_shape, layout, false, input_values,
padding_type, kernel_height, kernel_width, stride_height,
stride_width, output_shape, layout, ZDNN_OK, false,
expected_values);
}
/*
* Simple test of basic pool with zero strides
*/
void zero_strides(nnpa_function_code function_code) {
zdnn_data_layouts layout = ZDNN_NHWC;
/* Visualization of input values
[[
[[1, 10], [2, 20], [3, 30]],
[[4, 40], [5, 50], [6, 60]],
[[7, 70], [8, 80], [9, 90]]
]]
*/
uint32_t input_shape[] = {1, 3, 3, 2};
float input_values[] = {1, 10, 2, 20, 3, 30, 4, 40, 5,
50, 6, 60, 7, 70, 8, 80, 9, 90};
// Input pooling arguments
zdnn_pool_padding padding_type = VALID_PADDING;
uint32_t kernel_height = 3;
uint32_t kernel_width = 3;
uint32_t stride_height = 0;
uint32_t stride_width = 0;
/* Visualization of expected values
[[
[[9, 90]]
]]
*/
uint32_t output_shape[] = {1, 1, 1, 2};
/* Visualization of MAXPOOL2D expected values
[[
[[9, 90]]
]]
*/
/* Visualization of AVGPOOL2D expected values
[[
[[5, 50]]
]]
*/
float expected_values[] = {0, 0};
if (function_code == NNPA_MAXPOOL2D) {
expected_values[0] = 9;
expected_values[1] = 90;
} else {
expected_values[0] = 5;
expected_values[1] = 50;
}
test_pool_function(function_code, input_shape, layout, false, input_values,
padding_type, kernel_height, kernel_width, stride_height,
stride_width, output_shape, layout, ZDNN_OK, false,
expected_values);
}
void maxpool2d_zero_strides() { zero_strides(NNPA_MAXPOOL2D); }
void avgpool2d_zero_strides() { zero_strides(NNPA_AVGPOOL2D); }
/*
* Check that we don't hit a condition code when using an unexpected padding
* type.
*/
void unexpected_padding_fail(nnpa_function_code function_code) {
zdnn_data_layouts layout = ZDNN_NHWC;
uint32_t input_shape[] = {1, 3, 3, 2};
// Just repeat the same value rather than try and genarate a unique array of
// values for this test.
float input_values[] = {42};
// Input pooling arguments
// Set this to the first unused padding type. Then if a new one is
// supported, this should fail and we remember to update our code and
// documentation.
zdnn_pool_padding padding_type = 2;
uint32_t kernel_height = 1;
uint32_t kernel_width = 1;
uint32_t stride_height = 1;
uint32_t stride_width = 1;
// kernel and strides of 1 should basically copy the input (if the padding
// type was valid)
uint32_t *output_shape = input_shape;
float *expected_values = input_values;
test_pool_function(function_code, input_shape, layout, true, input_values,
padding_type, kernel_height, kernel_width, stride_height,
stride_width, output_shape, layout, ZDNN_FUNC_RC_F000,
true, expected_values);
}
void maxpool2d_unexpected_padding_fail() {
unexpected_padding_fail(NNPA_MAXPOOL2D);
}
void avgpool2d_unexpected_padding_fail() {
unexpected_padding_fail(NNPA_AVGPOOL2D);
}
/*
* Check that we don't hit a condition code when using 0 strides and the
* largest kernel size.
*/
void zero_strides_max_kernel_dims_pass(nnpa_function_code function_code) {
zdnn_data_layouts layout = ZDNN_NHWC;
uint32_t input_shape[] = {1, MAXIMUM_POOL_ZERO_STRIDES_KERNEL_SIZE,
MAXIMUM_POOL_ZERO_STRIDES_KERNEL_SIZE, 1};
// Just repeat the same value rather than try and genarate a unique array of
// values for this test.
float input_values[] = {42};
// Input pooling arguments
zdnn_pool_padding padding_type = VALID_PADDING;
uint32_t kernel_height = input_shape[1];
uint32_t kernel_width = input_shape[2];
uint32_t stride_height = 0;
uint32_t stride_width = 0;
uint32_t output_shape[] = {1, 1, 1, 1};
// Since all input values are the same, they should average to the same.
float *expected_values = input_values;
test_pool_function(function_code, input_shape, layout, true, input_values,
padding_type, kernel_height, kernel_width, stride_height,
stride_width, output_shape, layout, ZDNN_OK, true,
expected_values);
}
void maxpool2d_zero_strides_max_kernel_dims_pass() {
zero_strides_max_kernel_dims_pass(NNPA_MAXPOOL2D);
}
void avgpool2d_zero_strides_max_kernel_dims_pass() {
zero_strides_max_kernel_dims_pass(NNPA_AVGPOOL2D);
}
/*
* Check that we hit the expected condition code when using 0 strides and the
* over the largest kernel size.
*/
void zero_strides_max_kernel_height_fail(nnpa_function_code function_code) {
zdnn_data_layouts layout = ZDNN_NHWC;
// over_kernel_max is a valid tensor dimension size but is too large for a
// kernel. This should lead to a condition code from the NNPA. If not,
// update the test constant and the API documentation to the new value.
uint32_t over_kernel_max = MAXIMUM_POOL_ZERO_STRIDES_KERNEL_SIZE + 1;
uint32_t input_shape[] = {1, over_kernel_max, 5, 1};
// Just repeat the same value rather than try and genarate a unique array of
// values for this test.
float input_values[] = {42};
// Input pooling arguments
zdnn_pool_padding padding_type = VALID_PADDING;
uint32_t kernel_height = input_shape[1];
uint32_t kernel_width = input_shape[2];
uint32_t stride_height = 0;
uint32_t stride_width = 0;
uint32_t output_shape[] = {1, 1, 1, 1};
// Output values don't really matter as we expect failure status.
float *expected_values = input_values;
test_pool_function(function_code, input_shape, layout, true, input_values,
padding_type, kernel_height, kernel_width, stride_height,
stride_width, output_shape, layout, ZDNN_FUNC_RC_F001,
true, expected_values);
}
void maxpool2d_zero_strides_max_kernel_height_fail() {
zero_strides_max_kernel_height_fail(NNPA_MAXPOOL2D);
}
void avgpool2d_zero_strides_max_kernel_height_fail() {
zero_strides_max_kernel_height_fail(NNPA_AVGPOOL2D);
}
/*
* Check that we hit the expected condition code when using 0 strides and the
* over the largest kernel size.
*/
void zero_strides_max_kernel_width_fail(nnpa_function_code function_code) {
zdnn_data_layouts layout = ZDNN_NHWC;
// over_kernel_max is a valid tensor dimension size but is too large for a
// kernel. This should lead to a condition code from the NNPA. If not,
// update the test constant and the API documentation to the new value.
uint32_t over_kernel_max = MAXIMUM_POOL_ZERO_STRIDES_KERNEL_SIZE + 1;
uint32_t input_shape[] = {1, 8, over_kernel_max, 1};
// Just repeat the same value rather than try and genarate a unique array of
// values for this test.
float input_values[] = {42};
// Input pooling arguments
zdnn_pool_padding padding_type = VALID_PADDING;
uint32_t kernel_height = input_shape[1];
uint32_t kernel_width = input_shape[2];
uint32_t stride_height = 0;
uint32_t stride_width = 0;
uint32_t output_shape[] = {1, 1, 1, 1};
// Output values don't really matter as we expect failure status.
float *expected_values = input_values;
test_pool_function(function_code, input_shape, layout, true, input_values,
padding_type, kernel_height, kernel_width, stride_height,
stride_width, output_shape, layout, ZDNN_FUNC_RC_F001,
true, expected_values);
}
void maxpool2d_zero_strides_max_kernel_width_fail() {
zero_strides_max_kernel_width_fail(NNPA_MAXPOOL2D);
}
void avgpool2d_zero_strides_max_kernel_width_fail() {
zero_strides_max_kernel_width_fail(NNPA_AVGPOOL2D);
}
/*
* Check that we don't hit a condition code when using nonzero strides and the
* largest kernel size.
*/
void max_kernel_pass(nnpa_function_code function_code,
zdnn_pool_padding padding_type) {
zdnn_data_layouts layout = ZDNN_NHWC;
uint32_t input_shape[] = {1, MAXIMUM_POOL_NONZERO_STRIDES_KERNEL_SIZE,
MAXIMUM_POOL_NONZERO_STRIDES_KERNEL_SIZE, 1};
// Just repeat the same value rather than try and genarate a unique array of
// values for this test.
float input_values[] = {42};
// Input pooling arguments
uint32_t kernel_height = input_shape[1];
uint32_t kernel_width = input_shape[2];
uint32_t stride_height = 1;
uint32_t stride_width = 1;
uint32_t output_shape[] = {1, 1, 1, 1};
// Since all input values are the same, they should average to the same.
float *expected_values = input_values;
// use input_shape[] as output shape if SAME_PADDING since stride
// height/width are 1
test_pool_function(function_code, input_shape, layout, true, input_values,
padding_type, kernel_height, kernel_width, stride_height,
stride_width,
padding_type == SAME_PADDING ? input_shape : output_shape,
layout, ZDNN_OK, true, expected_values);
}
void maxpool2d_max_kernel_valid_padding_pass() {
max_kernel_pass(NNPA_MAXPOOL2D, VALID_PADDING);
}
void maxpool2d_max_kernel_same_padding_pass() {
max_kernel_pass(NNPA_MAXPOOL2D, SAME_PADDING);
}
void avgpool2d_max_kernel_valid_padding_pass() {
max_kernel_pass(NNPA_AVGPOOL2D, VALID_PADDING);
}
void avgpool2d_max_kernel_same_padding_pass() {
max_kernel_pass(NNPA_AVGPOOL2D, SAME_PADDING);
}
/*
* Check that we hit the expected condition code when using 0 strides and the
* over the largest kernel size.
*/
void max_kernel_height_fail(nnpa_function_code function_code,
zdnn_pool_padding padding_type) {
zdnn_data_layouts layout = ZDNN_NHWC;
// over_kernel_max is a valid tensor dimension size but is too large for a
// kernel. This should lead to a condition code from the NNPA. If not,
// update the test constant and the API documentation to the new value.
uint32_t over_kernel_max = MAXIMUM_POOL_NONZERO_STRIDES_KERNEL_SIZE + 1;
uint32_t input_shape[] = {1, over_kernel_max, 5, 1};
// Just repeat the same value rather than try and genarate a unique array of
// values for this test.
float input_values[] = {42};
// Input pooling arguments
uint32_t kernel_height = input_shape[1];
uint32_t kernel_width = input_shape[2];
uint32_t stride_height = 1;
uint32_t stride_width = 1;
uint32_t output_shape[] = {1, 1, 1, 1};
// Output values don't really matter as we expect failure status.
float *expected_values = input_values;
// use input_shape[] as output shape if SAME_PADDING since stride
// height/width are 1
test_pool_function(function_code, input_shape, layout, true, input_values,
padding_type, kernel_height, kernel_width, stride_height,
stride_width,
padding_type == SAME_PADDING ? input_shape : output_shape,
layout, ZDNN_FUNC_RC_F002, true, expected_values);
}
void maxpool2d_max_kernel_valid_padding_height_fail() {
max_kernel_height_fail(NNPA_MAXPOOL2D, VALID_PADDING);
}
void maxpool2d_max_kernel_same_padding_height_fail() {
max_kernel_height_fail(NNPA_MAXPOOL2D, SAME_PADDING);
}
void avgpool2d_max_kernel_valid_padding_height_fail() {
max_kernel_height_fail(NNPA_AVGPOOL2D, VALID_PADDING);
}
void avgpool2d_max_kernel_same_padding_height_fail() {
max_kernel_height_fail(NNPA_AVGPOOL2D, SAME_PADDING);
}
/*
* Check that we hit the expected condition code when using 0 strides and the
* over the largest kernel size.
*/
void max_kernel_width_fail(nnpa_function_code function_code,
zdnn_pool_padding padding_type) {
zdnn_data_layouts layout = ZDNN_NHWC;
// over_kernel_max is a valid tensor dimension size but is too large for a
// kernel. This should lead to a condition code from the NNPA. If not,
// update the test constant and the API documentation to the new value.
uint32_t over_kernel_max = MAXIMUM_POOL_NONZERO_STRIDES_KERNEL_SIZE + 1;
uint32_t input_shape[] = {1, 8, over_kernel_max, 1};
// Just repeat the same value rather than try and genarate a unique array of
// values for this test.
float input_values[] = {42};
// Input pooling arguments
uint32_t kernel_height = input_shape[1];
uint32_t kernel_width = input_shape[2];
uint32_t stride_height = 1;
uint32_t stride_width = 1;
uint32_t output_shape[] = {1, 1, 1, 1};
// Output values don't really matter as we expect failure status.
float *expected_values = input_values;
// use input_shape[] as output shape if SAME_PADDING since stride
// height/width are 1
test_pool_function(function_code, input_shape, layout, true, input_values,
padding_type, kernel_height, kernel_width, stride_height,
stride_width,
padding_type == SAME_PADDING ? input_shape : output_shape,
layout, ZDNN_FUNC_RC_F002, true, expected_values);
}
void maxpool2d_max_kernel_valid_padding_width_fail() {
max_kernel_width_fail(NNPA_MAXPOOL2D, VALID_PADDING);
}
void maxpool2d_max_kernel_same_padding_width_fail() {
max_kernel_width_fail(NNPA_MAXPOOL2D, SAME_PADDING);
}
void avgpool2d_max_kernel_valid_padding_width_fail() {
max_kernel_width_fail(NNPA_AVGPOOL2D, VALID_PADDING);
}
void avgpool2d_max_kernel_same_padding_width_fail() {
max_kernel_width_fail(NNPA_AVGPOOL2D, SAME_PADDING);
}
/*
* Check that we don't hit a condition code when using nonzero strides and the
* largest stride size.
*/
void max_stride_pass(nnpa_function_code function_code,
zdnn_pool_padding padding_type) {
zdnn_data_layouts layout = ZDNN_NHWC;
uint32_t input_shape[] = {1, 2 * MAXIMUM_POOL_NONZERO_STRIDES_STRIDE_SIZE,
2 * MAXIMUM_POOL_NONZERO_STRIDES_STRIDE_SIZE, 1};
// Just repeat the same value rather than try and genarate a unique array of
// values for this test.
float input_values[] = {42};
// Input pooling arguments
uint32_t kernel_height = input_shape[1] / 2;
uint32_t kernel_width = input_shape[2] / 2;
uint32_t stride_height = input_shape[1] / 2;
uint32_t stride_width = input_shape[2] / 2;
// With stride and kernel set to exactly 1/2 of input, we'd expect output to
// end with a height and width of exactly 2.
// These dimensions work for both VALID_PADDING and VALID_PADDING
uint32_t output_shape[] = {1, 2, 2, 1};
// Since all input values are the same, they should average to the same.
float expected_values[] = {input_values[0], input_values[0], input_values[0],
input_values[0]};
test_pool_function(function_code, input_shape, layout, true, input_values,
padding_type, kernel_height, kernel_width, stride_height,
stride_width, output_shape, layout, ZDNN_OK, true,
expected_values);
}
void maxpool2d_max_stride_valid_padding_pass() {
max_stride_pass(NNPA_MAXPOOL2D, VALID_PADDING);
}
void maxpool2d_max_stride_same_padding_pass() {
max_stride_pass(NNPA_MAXPOOL2D, SAME_PADDING);
}
void avgpool2d_max_stride_valid_padding_pass() {
max_stride_pass(NNPA_AVGPOOL2D, VALID_PADDING);
}
void avgpool2d_max_stride_same_padding_pass() {
max_stride_pass(NNPA_AVGPOOL2D, SAME_PADDING);
}
/*
* Check that we hit the expected condition code when using just over the
* largest nonzero strides allowed
*/
void max_stride_height_fail(nnpa_function_code function_code,
zdnn_pool_padding padding_type) {
zdnn_data_layouts layout = ZDNN_NHWC;
// over_stride_max is a valid tensor dimension size but is too large for a
// stride. This should lead to a condition code from the zAIU. If not, update
// the test constant and the API documentation to the new value.
uint32_t over_stride_max = MAXIMUM_POOL_NONZERO_STRIDES_STRIDE_SIZE + 1;
// Use 2 * X here to make determining exected shape and values easier.
uint32_t input_shape[] = {1, 2 * over_stride_max,
2 * MAXIMUM_POOL_NONZERO_STRIDES_STRIDE_SIZE, 1};
// Just repeat the same value rather than try and genarate a unique array of
// values for this test.
float input_values[] = {42};
// Input pooling arguments
uint32_t kernel_height = input_shape[1] / 2;
uint32_t kernel_width = input_shape[2] / 2;
uint32_t stride_height = input_shape[1] / 2;
uint32_t stride_width = input_shape[2] / 2;
// With stride and kernel set to exactly 1/2 of input, we'd expect output to
// end with a height and width of exactly 2.
uint32_t output_shape[] = {1, 2, 2, 1};
// Output values don't really matter as we expect failure status.
float expected_values[] = {input_values[0], input_values[0], input_values[0],
input_values[0]};
test_pool_function(function_code, input_shape, layout, true, input_values,
padding_type, kernel_height, kernel_width, stride_height,
stride_width, output_shape, layout, ZDNN_FUNC_RC_F003,
true, expected_values);
}
void maxpool2d_max_stride_valid_padding_height_fail() {
max_stride_height_fail(NNPA_MAXPOOL2D, VALID_PADDING);
}
void maxpool2d_max_stride_same_padding_height_fail() {
max_stride_height_fail(NNPA_MAXPOOL2D, SAME_PADDING);
}
void avgpool2d_max_stride_valid_padding_height_fail() {
max_stride_height_fail(NNPA_AVGPOOL2D, VALID_PADDING);
}
void avgpool2d_max_stride_same_padding_height_fail() {
max_stride_height_fail(NNPA_AVGPOOL2D, SAME_PADDING);
}
/*
* Check that we hit the expected condition code when using just over the
* largest nonzero strides allowed
*/
void max_stride_width_fail(nnpa_function_code function_code,
zdnn_pool_padding padding_type) {
zdnn_data_layouts layout = ZDNN_NHWC;
// over_stride_max is a valid tensor dimension size but is too large for a
// stride. This should lead to a condition code from the zAIU. If not, update
// the test constant and the API documentation to the new value.
uint32_t over_stride_max = MAXIMUM_POOL_NONZERO_STRIDES_STRIDE_SIZE + 1;
// Use 2 * X here to make determining exected shape and values easier.
uint32_t input_shape[] = {1, 2 * MAXIMUM_POOL_NONZERO_STRIDES_STRIDE_SIZE,
2 * over_stride_max, 1};
// Just repeat the same value rather than try and genarate a unique array of
// values for this test.
float input_values[] = {42};
// Input pooling arguments
uint32_t kernel_height = input_shape[1] / 2;
uint32_t kernel_width = input_shape[2] / 2;
uint32_t stride_height = input_shape[1] / 2;
uint32_t stride_width = input_shape[2] / 2;
// With stride and kernel set to exactly 1/2 of input, we'd expect output to
// end with a height and width of exactly 2.
uint32_t output_shape[] = {1, 2, 2, 1};
// Output values don't really matter as we expect failure status.
float expected_values[] = {input_values[0], input_values[0], input_values[0],
input_values[0]};
test_pool_function(function_code, input_shape, layout, true, input_values,
padding_type, kernel_height, kernel_width, stride_height,
stride_width, output_shape, layout, ZDNN_FUNC_RC_F003,
true, expected_values);
}
void maxpool2d_max_stride_valid_padding_width_fail() {
max_stride_width_fail(NNPA_MAXPOOL2D, VALID_PADDING);
}
void maxpool2d_max_stride_same_padding_width_fail() {
max_stride_width_fail(NNPA_MAXPOOL2D, SAME_PADDING);
}
void avgpool2d_max_stride_valid_padding_width_fail() {
max_stride_width_fail(NNPA_AVGPOOL2D, VALID_PADDING);
}
void avgpool2d_max_stride_same_padding_width_fail() {
max_stride_width_fail(NNPA_AVGPOOL2D, SAME_PADDING);
}
/*
* Check that we hit the expected condition code when using just over the
* largest input height/width allowed when strides are non-zero
*/
void nonzero_strides_bad_height_or_width_fail(nnpa_function_code function_code,
bool bad_height, bool bad_width,
zdnn_pool_padding padding_type) {
zdnn_data_layouts layout = ZDNN_NHWC;
uint32_t input_shape[] = {
1, MAXIMUM_POOL_NONZERO_STRIDES_HEIGHT_WIDTH + (bad_height ? 1 : 0),
MAXIMUM_POOL_NONZERO_STRIDES_HEIGHT_WIDTH + (bad_width ? 1 : 0), 1};
// Just repeat the same value rather than try and genarate a unique array of
// values for this test.
float input_values[] = {42};
uint32_t kernel_height = 1;
uint32_t kernel_width = 1;
uint32_t stride_height = 1;
uint32_t stride_width = 1;
// when kernel height/width and stride height/width are all 1, output shape is
// same as input's
// Output values don't really matter as we expect failure status.
test_pool_function(function_code, input_shape, layout, true, input_values,
padding_type, kernel_height, kernel_width, stride_height,
stride_width, input_shape, layout, ZDNN_FUNC_RC_F004, true,
ZERO_ARRAY);
}
void maxpool2d_non_zero_strides_valid_padding_height_fail() {
nonzero_strides_bad_height_or_width_fail(NNPA_MAXPOOL2D, true, false,
VALID_PADDING);
}
void maxpool2d_non_zero_strides_same_padding_height_fail() {
nonzero_strides_bad_height_or_width_fail(NNPA_MAXPOOL2D, true, false,
SAME_PADDING);
}
void avgpool2d_non_zero_strides_valid_padding_height_fail() {
nonzero_strides_bad_height_or_width_fail(NNPA_AVGPOOL2D, true, false,
VALID_PADDING);
}
void avgpool2d_non_zero_strides_same_padding_height_fail() {
nonzero_strides_bad_height_or_width_fail(NNPA_AVGPOOL2D, true, false,
SAME_PADDING);
}
void maxpool2d_non_zero_strides_valid_padding_width_fail() {
nonzero_strides_bad_height_or_width_fail(NNPA_MAXPOOL2D, false, true,
VALID_PADDING);
}
void maxpool2d_non_zero_strides_same_padding_width_fail() {
nonzero_strides_bad_height_or_width_fail(NNPA_MAXPOOL2D, false, true,
SAME_PADDING);
}
void avgpool2d_non_zero_strides_valid_padding_width_fail() {
nonzero_strides_bad_height_or_width_fail(NNPA_AVGPOOL2D, false, true,
VALID_PADDING);
}
void avgpool2d_non_zero_strides_same_padding_width_fail() {
nonzero_strides_bad_height_or_width_fail(NNPA_AVGPOOL2D, false, true,
SAME_PADDING);
}
int main(int argc, char *argv[]) {
UNITY_BEGIN();
RUN_TEST_ALL_DLFLOAT16_PRE_DATATYPES(maxpool2d_same_basic);
RUN_TEST_ALL_DLFLOAT16_PRE_DATATYPES(maxpool2d_valid_basic);
RUN_TEST_ALL_DLFLOAT16_PRE_DATATYPES(avgpool2d_same_basic);
RUN_TEST_ALL_DLFLOAT16_PRE_DATATYPES(avgpool2d_valid_basic);
RUN_TEST_ALL_DLFLOAT16_PRE_DATATYPES(maxpool2d_zero_strides);
RUN_TEST_ALL_DLFLOAT16_PRE_DATATYPES(avgpool2d_zero_strides);
// Tests to confirm we get the expected condition codes from the NNPA.
// Technically these don't test our library. However we document these
// in our API. These tests should fail if hardware changes the underlying
// conditions meaning we need to update our documentation (and tests).
{
RUN_TEST_ALL_DLFLOAT16_PRE_DATATYPES(maxpool2d_unexpected_padding_fail);
RUN_TEST_ALL_DLFLOAT16_PRE_DATATYPES(avgpool2d_unexpected_padding_fail);
RUN_TEST_ALL_DLFLOAT16_PRE_DATATYPES(
maxpool2d_zero_strides_max_kernel_dims_pass);
RUN_TEST_ALL_DLFLOAT16_PRE_DATATYPES(
maxpool2d_zero_strides_max_kernel_height_fail);
RUN_TEST_ALL_DLFLOAT16_PRE_DATATYPES(
maxpool2d_zero_strides_max_kernel_width_fail);
RUN_TEST_ALL_DLFLOAT16_PRE_DATATYPES(
avgpool2d_zero_strides_max_kernel_dims_pass);
RUN_TEST_ALL_DLFLOAT16_PRE_DATATYPES(
avgpool2d_zero_strides_max_kernel_height_fail);
RUN_TEST_ALL_DLFLOAT16_PRE_DATATYPES(
avgpool2d_zero_strides_max_kernel_width_fail);
RUN_TEST_ALL_DLFLOAT16_PRE_DATATYPES(
maxpool2d_max_kernel_valid_padding_pass);
RUN_TEST_ALL_DLFLOAT16_PRE_DATATYPES(
maxpool2d_max_kernel_same_padding_pass);
RUN_TEST_ALL_DLFLOAT16_PRE_DATATYPES(
avgpool2d_max_kernel_valid_padding_pass);
RUN_TEST_ALL_DLFLOAT16_PRE_DATATYPES(
avgpool2d_max_kernel_same_padding_pass);
RUN_TEST_ALL_DLFLOAT16_PRE_DATATYPES(
maxpool2d_max_kernel_valid_padding_height_fail);
RUN_TEST_ALL_DLFLOAT16_PRE_DATATYPES(
maxpool2d_max_kernel_same_padding_height_fail);
RUN_TEST_ALL_DLFLOAT16_PRE_DATATYPES(
avgpool2d_max_kernel_valid_padding_height_fail);
RUN_TEST_ALL_DLFLOAT16_PRE_DATATYPES(
avgpool2d_max_kernel_same_padding_height_fail);
RUN_TEST_ALL_DLFLOAT16_PRE_DATATYPES(
maxpool2d_max_kernel_valid_padding_width_fail);
RUN_TEST_ALL_DLFLOAT16_PRE_DATATYPES(
maxpool2d_max_kernel_same_padding_width_fail);
RUN_TEST_ALL_DLFLOAT16_PRE_DATATYPES(
avgpool2d_max_kernel_valid_padding_width_fail);
RUN_TEST_ALL_DLFLOAT16_PRE_DATATYPES(
avgpool2d_max_kernel_same_padding_width_fail);
RUN_TEST_ALL_DLFLOAT16_PRE_DATATYPES(
maxpool2d_max_stride_valid_padding_pass);
RUN_TEST_ALL_DLFLOAT16_PRE_DATATYPES(
maxpool2d_max_stride_same_padding_pass);
RUN_TEST_ALL_DLFLOAT16_PRE_DATATYPES(
avgpool2d_max_stride_valid_padding_pass);
RUN_TEST_ALL_DLFLOAT16_PRE_DATATYPES(
avgpool2d_max_stride_same_padding_pass);
RUN_TEST_ALL_DLFLOAT16_PRE_DATATYPES(
maxpool2d_max_stride_valid_padding_height_fail);
RUN_TEST_ALL_DLFLOAT16_PRE_DATATYPES(
maxpool2d_max_stride_same_padding_height_fail);
RUN_TEST_ALL_DLFLOAT16_PRE_DATATYPES(
avgpool2d_max_stride_valid_padding_height_fail);
RUN_TEST_ALL_DLFLOAT16_PRE_DATATYPES(
avgpool2d_max_stride_same_padding_height_fail);
RUN_TEST_ALL_DLFLOAT16_PRE_DATATYPES(
maxpool2d_max_stride_valid_padding_width_fail);
RUN_TEST_ALL_DLFLOAT16_PRE_DATATYPES(
maxpool2d_max_stride_same_padding_width_fail);
RUN_TEST_ALL_DLFLOAT16_PRE_DATATYPES(
avgpool2d_max_stride_valid_padding_width_fail);
RUN_TEST_ALL_DLFLOAT16_PRE_DATATYPES(
avgpool2d_max_stride_same_padding_width_fail);
RUN_TEST_ALL_DLFLOAT16_PRE_DATATYPES(
maxpool2d_non_zero_strides_valid_padding_height_fail);
RUN_TEST_ALL_DLFLOAT16_PRE_DATATYPES(
maxpool2d_non_zero_strides_same_padding_height_fail);
RUN_TEST_ALL_DLFLOAT16_PRE_DATATYPES(
avgpool2d_non_zero_strides_valid_padding_height_fail);
RUN_TEST_ALL_DLFLOAT16_PRE_DATATYPES(
avgpool2d_non_zero_strides_same_padding_height_fail);
RUN_TEST_ALL_DLFLOAT16_PRE_DATATYPES(
maxpool2d_non_zero_strides_valid_padding_width_fail);
RUN_TEST_ALL_DLFLOAT16_PRE_DATATYPES(
maxpool2d_non_zero_strides_same_padding_width_fail);
RUN_TEST_ALL_DLFLOAT16_PRE_DATATYPES(
avgpool2d_non_zero_strides_valid_padding_width_fail);
RUN_TEST_ALL_DLFLOAT16_PRE_DATATYPES(
avgpool2d_non_zero_strides_same_padding_width_fail);
}
return UNITY_END();
}
zDNN-1.1.2/tests/testDriver_zdnn_batchnorm.c 0000664 0000000 0000000 00000016210 15000221702 0021040 0 ustar 00root root 0000000 0000000 // SPDX-License-Identifier: Apache-2.0
/*
* Copyright IBM Corp. 2021, 2024
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#include
#include
#include "testsupport.h"
void setUp(void) {
tol_bfloat.ulps = 64;
tol_bfloat.epsilon_mult = (0.1 / EPSILON_BFLOAT) + 1;
tol_fp16.ulps = 64;
tol_fp16.epsilon_mult = (0.1 / EPSILON_FP16) + 1;
tol_fp32.ulps = 64 * 16384;
tol_fp32.epsilon_mult = (0.1 / EPSILON_FLOAT) + 1;
VERIFY_HW_ENV;
}
void tearDown(void) {}
/**
* Helper function to compute expected output tensor from randomly generated
* test input arrays.
*
* | input_a | input_b | input_c | result |
* | (n, h, w, c) | (c) | (c) | (n, h, w, c) |
*
* formula: output(*, *, *, c) = input_a(*, *, *, c) * input_b(c) + input_c(c)
*
*/
void gen_test_expected_fp32_array(uint32_t *shape, zdnn_data_types type,
float *input_a, float *input_b,
float *input_c, float *result) {
uint32_t c = shape[3];
for (uint64_t i = 0; i < (uint64_t)shape[0] * shape[1] * shape[2] * c; i++) {
float cleansed_input_a = 0;
float cleansed_input_b = 0;
float cleansed_input_c = 0;
switch (type) {
case (BFLOAT):
cleansed_input_a = CLEANSE_BFLOAT(input_a[i]);
cleansed_input_b = CLEANSE_BFLOAT(input_b[i % c]);
cleansed_input_c = CLEANSE_BFLOAT(input_c[i % c]);
break;
case (FP16):
cleansed_input_a = CLEANSE_FP16(input_a[i]);
cleansed_input_b = CLEANSE_FP16(input_b[i % c]);
cleansed_input_c = CLEANSE_FP16(input_c[i % c]);
break;
case (FP32):
cleansed_input_a = CLEANSE_FP32(input_a[i]);
cleansed_input_b = CLEANSE_FP32(input_b[i % c]);
cleansed_input_c = CLEANSE_FP32(input_c[i % c]);
break;
default:
break;
}
result[i] = cleansed_input_a * cleansed_input_b + cleansed_input_c;
}
}
void do_test(uint32_t *input_a_shape, uint32_t *input_b_shape,
uint32_t *input_c_shape, uint32_t *output_shape,
zdnn_data_types dtype, float *input_a_values,
float *input_b_values, float *input_c_values,
zdnn_status expected_status, float *expected_values) {
zdnn_ztensor *input_a_ztensor = alloc_ztensor_with_values(
input_a_shape, ZDNN_NHWC, dtype, NO_CONCAT, false, input_a_values);
zdnn_ztensor *input_b_ztensor = alloc_ztensor_with_values(
input_b_shape, ZDNN_1D, dtype, NO_CONCAT, false, input_b_values);
zdnn_ztensor *input_c_ztensor = alloc_ztensor_with_values(
input_c_shape, ZDNN_1D, dtype, NO_CONCAT, false, input_c_values);
zdnn_ztensor *output_ztensor = alloc_ztensor_with_values(
output_shape, ZDNN_NHWC, dtype, NO_CONCAT, true, ZERO_ARRAY);
// Call public NNPA method
zdnn_status status = zdnn_batchnorm(input_a_ztensor, input_b_ztensor,
input_c_ztensor, output_ztensor);
// Assert returned status matches expected
TEST_ASSERT_MESSAGE_FORMATTED(
status == expected_status,
"call to zdnn_batchnorm to returned status %08x but expected "
"%08x\n",
status, expected_status);
fp_tolerance *tol = NULL;
switch (output_ztensor->pre_transformed_desc->type) {
case BFLOAT:
tol = &tol_bfloat;
break;
case FP16:
tol = &tol_fp16;
break;
case FP32:
tol = &tol_fp32;
break;
default:
break;
// should never get here
}
// If expected status is ZDNN_OK, assert output values matches expected
if (expected_status == ZDNN_OK) {
assert_ztensor_values_adv(output_ztensor, false, expected_values, *tol);
}
// Cleanup test ztensors
free_ztensor_buffers(4, input_a_ztensor, input_b_ztensor, input_c_ztensor,
output_ztensor);
}
void zdnn_batchnorm_small_values() {
uint32_t shape[] = {1, 3, 3, 2};
float input_a_values[] = {0.1, 1, 0.2, 2, 0.3, 3, 0.4, 4, 0.5,
5, 0.6, 6, 0.7, 7, 0.8, 8, 0.9, 9};
uint32_t input_b_shape[] = {2};
float input_b_values[] = {0.45, 0.55};
uint32_t input_c_shape[] = {2};
float input_c_values[] = {0.75, 0.45};
float output_values[] = {0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0};
gen_test_expected_fp32_array(shape, test_datatype, input_a_values,
input_b_values, input_c_values, output_values);
do_test(shape, input_b_shape, input_c_shape, shape, test_datatype,
input_a_values, input_b_values, input_c_values, ZDNN_OK,
output_values);
}
void zdnn_batchnorm_high_values() {
uint32_t shape[] = {1, 3, 3, 2};
float input_a_values[] = {1, 10, 2, 20, 3, 30, 4, 40, 5,
50, 6, 60, 7, 70, 8, 80, 9, 90};
uint32_t input_b_shape[] = {2};
float input_b_values[] = {4.5, 5.5};
uint32_t input_c_shape[] = {2};
float input_c_values[] = {7.5, 4.5};
float output_values[] = {0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0};
gen_test_expected_fp32_array(shape, test_datatype, input_a_values,
input_b_values, input_c_values, output_values);
do_test(shape, input_b_shape, input_c_shape, shape, test_datatype,
input_a_values, input_b_values, input_c_values, ZDNN_OK,
output_values);
}
void test_batchnorm_random_values(uint32_t n, uint32_t h, uint32_t w,
uint32_t c) {
uint32_t shape[] = {n, h, w, c};
uint64_t num_values = (uint64_t)n * h * w * c;
float input_a_values[num_values];
gen_random_float_array_pos_neg(num_values, input_a_values);
uint32_t input_b_shape[] = {c};
float input_b_values[c];
gen_random_float_array_pos_neg(c, input_b_values);
uint32_t input_c_shape[] = {c};
float input_c_values[c];
gen_random_float_array_pos_neg(c, input_c_values);
float output_values[num_values];
gen_test_expected_fp32_array(shape, test_datatype, input_a_values,
input_b_values, input_c_values, output_values);
do_test(shape, input_b_shape, input_c_shape, shape, test_datatype,
input_a_values, input_b_values, input_c_values, ZDNN_OK,
output_values);
}
void zdnn_batchnorm_random_values_low_dims() {
test_batchnorm_random_values(2, 3, 4, 5);
}
void zdnn_batchnorm_random_values_high_dims() {
test_batchnorm_random_values(2, 3, 4, 100);
}
int main() {
UNITY_BEGIN();
RUN_TEST_ALL_DLFLOAT16_PRE_DATATYPES(zdnn_batchnorm_small_values);
RUN_TEST_ALL_DLFLOAT16_PRE_DATATYPES(zdnn_batchnorm_high_values);
RUN_TEST_ALL_DLFLOAT16_PRE_DATATYPES(zdnn_batchnorm_random_values_low_dims);
RUN_TEST_ALL_DLFLOAT16_PRE_DATATYPES(zdnn_batchnorm_random_values_high_dims);
return UNITY_END();
}
zDNN-1.1.2/tests/testDriver_zdnn_conv2d.c 0000664 0000000 0000000 00001510704 15000221702 0020266 0 ustar 00root root 0000000 0000000 // SPDX-License-Identifier: Apache-2.0
/*
* Copyright IBM Corp. 2021, 2024
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#include
#include
#include "testsupport.h"
void setUp(void) {
tol_bfloat.ulps = 64;
tol_bfloat.epsilon_mult = (0.1 / EPSILON_BFLOAT) + 1;
tol_fp16.ulps = 64;
tol_fp16.epsilon_mult = (0.1 / EPSILON_FP16) + 1;
tol_fp32.ulps = 64 * 16384;
tol_fp32.epsilon_mult = (0.1 / EPSILON_FLOAT) + 1;
VERIFY_HW_ENV;
}
void tearDown(void) {}
// convenience debug macro
#define PRINT_DIMS(x) \
printf(#x " pre: %u %u %u %u\n", (x)->pre_transformed_desc->dim4, \
(x)->pre_transformed_desc->dim3, (x)->pre_transformed_desc->dim2, \
(x)->pre_transformed_desc->dim1); \
printf(#x ": %u %u %u %u\n", (x)->transformed_desc->dim4, \
(x)->transformed_desc->dim3, (x)->transformed_desc->dim2, \
(x)->transformed_desc->dim1);
typedef struct input_set {
uint32_t n;
uint32_t height_in;
uint32_t width_in;
uint32_t channel_in;
uint32_t kernel_size[2];
uint32_t channel_out;
} input_set;
typedef struct strides_input_set {
uint32_t height;
uint32_t width;
} strides_input_set;
void test_conv2d(input_set *set, strides_input_set *strides, void *input_vals,
void *kernel_vals, void *bias_vals, void *output_exp_vals,
void *output_relu_exp_vals, zdnn_pool_padding padding,
void *clipping_value) {
zdnn_status status;
/****************************************************************************
ZDNN dims requirements
if stride_height > 0, stride_width > 0, padding = SAME
input: (n, x, y, z)
kernel: (i, j, z, a)
bias: (a)
output: (n, e, f, a) (e = ceil(x/stride_height), f = ceil(y/stride_width))
if stride_height > 0, stride_width > 0, padding = VALID
input: (n, x, y, z) (x > i, y > j)
kernel: (i, j, z, a)
bias: (a)
output: (n, e, f, a) (e = ceil((x - i + 1)/stride_height),
f = ceil((y - j + 1)/stride_width))
if stride_height = 0, stride_width = 0, padding = VALID
input: (n, x, y, z)
kernel: (x, y, z, a)
bias: (a)
output: (n, 1, 1, a)
n = n
x = height_in
y = width_in
z = channel_in
(i, j) = kernel_size
a = channel_out
(stride_height, stride_width) = strides
****************************************************************************/
uint32_t input_dims[4] = {set->n, set->height_in, set->width_in,
set->channel_in};
uint32_t kernel_dims[4] = {0, 0, // 0s are placeholders
set->channel_in, set->channel_out};
uint32_t bias_dims[1] = {set->channel_out};
uint32_t output_dims[4] = {set->n, 0, 0,
set->channel_out}; // 0s are placeholders
// zero-strides + VALID_PADDING is special case, so ignore kernel_size[0] &
// [1] and set kernel_dims[0] & [1] to what zAIU wants
if (padding == VALID_PADDING && strides->height == 0 && strides->width == 0) {
kernel_dims[0] = set->height_in;
kernel_dims[1] = set->width_in;
} else {
kernel_dims[0] = set->kernel_size[0];
kernel_dims[1] = set->kernel_size[1];
}
// output_dims[1] & [2] are exactly what the zAIU wants
if (padding == VALID_PADDING) {
// output dim3
output_dims[1] =
(strides->height == 0 && strides->width == 0)
? 1
: CEIL((set->height_in - kernel_dims[0] + 1), strides->height);
// output dim2
output_dims[2] =
(strides->height == 0 && strides->width == 0)
? 1
: CEIL((set->width_in - kernel_dims[1] + 1), strides->width);
} else {
// not bother a switch statement, gotta be SAME_PADDING
output_dims[1] = CEIL(set->height_in, strides->height); // output dim3
output_dims[2] = CEIL(set->width_in, strides->width); // output dim2
}
zdnn_ztensor *input_ztensor = alloc_ztensor_with_values(
input_dims, ZDNN_NHWC, test_datatype, NO_CONCAT, false, input_vals);
zdnn_ztensor *kernel_ztensor = alloc_ztensor_with_values(
kernel_dims, ZDNN_HWCK, test_datatype, NO_CONCAT, false, kernel_vals);
zdnn_ztensor *bias_ztensor = alloc_ztensor_with_values(
bias_dims, ZDNN_1D, test_datatype, NO_CONCAT, false, bias_vals);
zdnn_ztensor *output_ztensor = alloc_ztensor_with_values(
output_dims, ZDNN_NHWC, test_datatype, NO_CONCAT, true, ZERO_ARRAY);
zdnn_ztensor *output_relu_ztensor = alloc_ztensor_with_values(
output_dims, ZDNN_NHWC, test_datatype, NO_CONCAT, true, ZERO_ARRAY);
BEGIN_BLOCK_IF_LOGLEVEL_TRACE {
PRINT_DIMS(input_ztensor);
PRINT_DIMS(kernel_ztensor);
PRINT_DIMS(bias_ztensor);
PRINT_DIMS(output_ztensor);
PRINT_DIMS(output_relu_ztensor);
printf("strides = height %u width %u\n", strides->height, strides->width);
}
status = zdnn_conv2d(input_ztensor, kernel_ztensor, bias_ztensor, padding,
strides->height, strides->width, CONV2D_ACT_NONE,
clipping_value, output_ztensor);
TEST_ASSERT_MESSAGE_FORMATTED(
status == ZDNN_OK,
"zdnn_conv2d() (CONV2D_ACT_NONE) failed, status = %08x", status);
status = zdnn_conv2d(input_ztensor, kernel_ztensor, bias_ztensor, padding,
strides->height, strides->width, CONV2D_ACT_RELU,
clipping_value, output_relu_ztensor);
TEST_ASSERT_MESSAGE_FORMATTED(
status == ZDNN_OK,
"zdnn_conv2d() (CONV2D_ACT_RELU) failed, status = %08x", status);
BEGIN_BLOCK_IF_LOGLEVEL_TRACE {
dumpdata_ztensor(input_ztensor, AS_FLOAT, true);
dumpdata_ztensor(kernel_ztensor, AS_FLOAT, true);
dumpdata_ztensor(bias_ztensor, AS_FLOAT, true);
dumpdata_ztensor(output_ztensor, AS_FLOAT, true);
dumpdata_ztensor(output_relu_ztensor, AS_FLOAT, true);
}
switch (output_ztensor->pre_transformed_desc->type) {
case (BFLOAT):
assert_ztensor_values_adv(output_ztensor, false, output_exp_vals,
tol_bfloat);
assert_ztensor_values_adv(output_relu_ztensor, false, output_relu_exp_vals,
tol_bfloat);
break;
case (FP16):
assert_ztensor_values_adv(output_ztensor, false, output_exp_vals, tol_fp16);
assert_ztensor_values_adv(output_relu_ztensor, false, output_relu_exp_vals,
tol_fp16);
break;
case (FP32):
assert_ztensor_values_adv(output_ztensor, false, output_exp_vals, tol_fp32);
assert_ztensor_values_adv(output_relu_ztensor, false, output_relu_exp_vals,
tol_fp32);
break;
default:
break;
}
free_ztensor_buffers(5, input_ztensor, kernel_ztensor, bias_ztensor,
output_ztensor, output_relu_ztensor);
}
/*******************************************************************
small:
n = 1
height_in = 4
width_in = 3
channel_in = 5
kernel_size = (2, 2)
channel_out = 2
medium:
n = 3
height_in = 10
width_in = 8
channel_in = 5
kernel_size = (2, 3)
channel_out = 5
large:
n = 4
height_in = 15
width_in = 10
channel_in = 6
kernel_size = (4, 6)
channel_out = 7
non-zero strides:
small: (1, 1)
medium: (2, 3)
large: (3, 4)
*******************************************************************/
input_set small_input = {1, 4, 3, 5, {2, 2}, 2};
input_set medium_input = {3, 10, 8, 5, {2, 3}, 5};
input_set large_input = {4, 15, 10, 6, {4, 6}, 7};
strides_input_set small_non0_strides = {1, 1};
strides_input_set medium_non0_strides = {2, 3};
strides_input_set large_non0_strides = {3, 4};
strides_input_set zero_strides = {0, 0};
void test_valid_padding_non_zero_strides_small() {
input_set *set = &small_input;
strides_input_set *strides = &small_non0_strides;
// 1,4,3,5
uint32_t input_vals[] = {
0x3f15a7d8, 0x3f55d27f, 0x3f04e322, 0x3f0bc669, 0x3f2eec77, 0x3d07cb10,
0x3e528880, 0x3f1da880, 0x3e3fb0a8, 0x3f40bb0b, 0x3e0e0bcc, 0x3f17da1f,
0x3f4f6880, 0x3d0b5ba0, 0x3f5ba1ae, 0x3e780c74, 0x3de61650, 0x3f7ae7a4,
0x3f71ba1f, 0x3f2fdc52, 0x3f50c293, 0x3e1d23d0, 0x3deae1a8, 0x3f615378,
0x3ba82d80, 0x3f4b0c93, 0x3e77825c, 0x3ea22f0a, 0x3aa20200, 0x3e33de00,
0x3e8e771c, 0x3f39eaa3, 0x3f324e26, 0x3f17f541, 0x3f3fe98e, 0x3ef6c34e,
0x3f3379fe, 0x3f6a0de8, 0x3ed4dfce, 0x3f1aca63, 0x3f51dd20, 0x3e50b72c,
0x3f6f62f4, 0x3ed5df52, 0x3de131a8, 0x3f7f3fc1, 0x3f26ab72, 0x3f70f111,
0x3e8ad072, 0x3e592e30, 0x3f32cd09, 0x3f4644b7, 0x3f19794f, 0x3f313923,
0x3f786a79, 0x3f114ab9, 0x3edfb038, 0x3d858c20, 0x3e50bd98, 0x3f563faa,
};
// 2,2,5,2
uint32_t kernel_vals[] = {
0xbadb6d80, 0x3da65588, 0xbdb06154, 0xbd13a074, 0xbca28f68, 0xbe1025bd,
0xbe5abfd6, 0xbe146a76, 0x3db50ae0, 0xbe05a938, 0x3df5a9a0, 0x3e3e2300,
0x3d759a80, 0x3cc1d430, 0x3d725b60, 0xbd67df54, 0xbe17a51c, 0xbdd2ced4,
0x3e0963a4, 0x3e44d336, 0xbe3c7496, 0xbd3c7db4, 0xbb912ca0, 0x3e4f5476,
0xbd8a65c2, 0xbdb281f2, 0xbdbcc6b6, 0xbe1ff856, 0xbe1b3afe, 0x3dcc5820,
0xbe2882ac, 0x3e2b57ec, 0x3e358cf2, 0xbe54696c, 0xbd340870, 0x3e45d54a,
0x3c07b640, 0xbe567290, 0xbdc76b34, 0x3dddf448,
};
// 2
uint32_t bias_vals[] = {
0x3c40ac50,
0xbdeac81f,
};
// 1,3,2,2
uint32_t output_exp_vals[] = {
0xbed199d0, 0xbee4ca5f, 0xbe0febf4, 0xbe2b0406, 0xbefd7c85, 0xbe846eb2,
0xbef1750b, 0x3e52c1ce, 0xbeb43421, 0xbe83d2bf, 0xbeeeaca6, 0xbe243821,
};
// 1,3,2,2
uint32_t output_relu_exp_vals[] = {
0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x3e52c1ce, 0x0, 0x0, 0x0, 0x0,
};
test_conv2d(set, strides, input_vals, kernel_vals, bias_vals, output_exp_vals,
output_relu_exp_vals, VALID_PADDING, NULL);
}
void test_valid_padding_non_zero_strides_small_with_clip() {
input_set *set = &small_input;
strides_input_set *strides = &small_non0_strides;
// 1,4,3,5
uint32_t input_vals[] = {
0x3f15a7d8, 0x3f55d27f, 0x3f04e322, 0x3f0bc669, 0x3f2eec77, 0x3d07cb10,
0x3e528880, 0x3f1da880, 0x3e3fb0a8, 0x3f40bb0b, 0x3e0e0bcc, 0x3f17da1f,
0x3f4f6880, 0x3d0b5ba0, 0x3f5ba1ae, 0x3e780c74, 0x3de61650, 0x3f7ae7a4,
0x3f71ba1f, 0x3f2fdc52, 0x3f50c293, 0x3e1d23d0, 0x3deae1a8, 0x3f615378,
0x3ba82d80, 0x3f4b0c93, 0x3e77825c, 0x3ea22f0a, 0x3aa20200, 0x3e33de00,
0x3e8e771c, 0x3f39eaa3, 0x3f324e26, 0x3f17f541, 0x3f3fe98e, 0x3ef6c34e,
0x3f3379fe, 0x3f6a0de8, 0x3ed4dfce, 0x3f1aca63, 0x3f51dd20, 0x3e50b72c,
0x3f6f62f4, 0x3ed5df52, 0x3de131a8, 0x3f7f3fc1, 0x3f26ab72, 0x3f70f111,
0x3e8ad072, 0x3e592e30, 0x3f32cd09, 0x3f4644b7, 0x3f19794f, 0x3f313923,
0x3f786a79, 0x3f114ab9, 0x3edfb038, 0x3d858c20, 0x3e50bd98, 0x3f563faa,
};
// 2,2,5,2
uint32_t kernel_vals[] = {
0xbadb6d80, 0x3da65588, 0xbdb06154, 0xbd13a074, 0xbca28f68, 0xbe1025bd,
0xbe5abfd6, 0xbe146a76, 0x3db50ae0, 0xbe05a938, 0x3df5a9a0, 0x3e3e2300,
0x3d759a80, 0x3cc1d430, 0x3d725b60, 0xbd67df54, 0xbe17a51c, 0xbdd2ced4,
0x3e0963a4, 0x3e44d336, 0xbe3c7496, 0xbd3c7db4, 0xbb912ca0, 0x3e4f5476,
0xbd8a65c2, 0xbdb281f2, 0xbdbcc6b6, 0xbe1ff856, 0xbe1b3afe, 0x3dcc5820,
0xbe2882ac, 0x3e2b57ec, 0x3e358cf2, 0xbe54696c, 0xbd340870, 0x3e45d54a,
0x3c07b640, 0xbe567290, 0xbdc76b34, 0x3dddf448,
};
// 2
uint32_t bias_vals[] = {
0x3c40ac50,
0xbdeac81f,
};
// 1,3,2,2
uint32_t output_exp_vals[] = {
0xbed199d0, 0xbee4ca5f, 0xbe0febf4, 0xbe2b0406, 0xbefd7c85, 0xbe846eb2,
0xbef1750b, 0x3e52c1ce, 0xbeb43421, 0xbe83d2bf, 0xbeeeaca6, 0xbe243821,
};
uint32_t clip_value = 0x3e4ccccd;
// 1,3,2,2
uint32_t output_relu_exp_vals[] = {
0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x3e4ccccd, 0x0, 0x0, 0x0, 0x0,
};
test_conv2d(set, strides, input_vals, kernel_vals, bias_vals, output_exp_vals,
output_relu_exp_vals, VALID_PADDING, (void *)&clip_value);
}
void test_valid_padding_zero_strides_small() {
input_set *set = &small_input;
strides_input_set *strides = &zero_strides;
// 1,4,3,5
uint32_t input_vals[] = {
0x3e83171a, 0x3f5ba100, 0x3f5727e7, 0x3ec9e104, 0x3f7090d1, 0x3f3c9175,
0x3ee5aee2, 0x3efe572a, 0x3e9832f2, 0x3e979464, 0x3ca6eaa0, 0x3f038eb2,
0x3e80c192, 0x3eceb0c4, 0x3f73959b, 0x3e5088c0, 0x3ee9ea48, 0x3def4bf8,
0x3ecdbc7c, 0x3f2e5b0e, 0x3ebfea62, 0x3f19e7a2, 0x3d036000, 0x3d523a30,
0x3d47c290, 0x3e834c8a, 0x3f516330, 0x3d9fbc50, 0x3d8bd6f0, 0x3f50fc3a,
0x3ea67152, 0x3f793f5d, 0x3ecc12e4, 0x3ed7f0f6, 0x3ee27582, 0x3db8b518,
0x3f514983, 0x3f46322b, 0x3f01a7c3, 0x3e8db848, 0x3ec98b80, 0x3f798662,
0x3f4533ba, 0x3e8bb4a4, 0x3f665b0a, 0x3f1b5c5f, 0x3eed7c54, 0x3c91afe0,
0x3f3037ae, 0x3dc482b0, 0x3e327314, 0x3d503cd0, 0x3f4fd817, 0x3d4cae30,
0x3f0dbc2b, 0x3ef2dcbc, 0x3e86a2e0, 0x3ddeb5d8, 0x3eeeb928, 0x3eca66d0,
};
// 4,3,5,2
uint32_t kernel_vals[] = {
0xba7d6000, 0x3d6f4388, 0xbdfc970a, 0x3d4fd1fc, 0xbcaa76f8, 0x3cc5f330,
0xbd9a56a4, 0x3d840448, 0xbd6a842e, 0xbbe916f0, 0x3d8dd556, 0x3d54a8b0,
0xbdaf1ac2, 0x3ddb109a, 0x3c5fd070, 0xbcb22910, 0x3de34624, 0x3cdd5a80,
0xbb98a3f0, 0x3c9eca70, 0xbdd99c0c, 0xbd9c9d8d, 0xbd59fae2, 0x3de6143c,
0x3def676c, 0xbcae097c, 0x3d6bf4d8, 0xbdce4036, 0x3da918da, 0xbd588e5a,
0xbdc2943f, 0x3d1fc068, 0x3b9cb4e0, 0x3de94a92, 0xbdf545a3, 0xbd720ed0,
0x3d802534, 0xbd983e82, 0x3e036c17, 0x3d9ce2f0, 0xbd4baa92, 0x3d912554,
0x3d510cc0, 0x3cd27970, 0xbda67275, 0x3d4b5998, 0x3bde7ac0, 0x3df8bae2,
0x3d51de2c, 0xbda0e525, 0x3d0dcca0, 0xbc857c3c, 0x3d9ea4b8, 0xbde5e9f4,
0xbd05df20, 0xbd9c9a3c, 0xbd878d22, 0x3db2278e, 0x3c8edf30, 0x3defe79c,
0xbb27a200, 0x3ba18540, 0xbdb33ea4, 0x3c429450, 0xbd4e1f72, 0x3d41d9b0,
0x3cb2ed40, 0xbd6690fa, 0x3db14610, 0x3db530b8, 0x3dd1a2f4, 0x3cf50c00,
0xbd6644da, 0xbda56848, 0x3de47024, 0x3d85b500, 0xbdf6dd66, 0xbd0516b6,
0xbd9bad42, 0xbddef5da, 0xbc3bb558, 0x3da4065e, 0x3d4010e8, 0xbdf66796,
0xbdab603a, 0x3dbd45fa, 0xbdbf0b1b, 0xbdfbdce1, 0x3cc51300, 0xbd039b9e,
0x3d0becc0, 0x3dfddf06, 0x3ddb8d0c, 0x3deae9de, 0xbd736b86, 0x3d45b890,
0xbd884dcc, 0xbb1812c0, 0xbdccfcc7, 0x3bbee240, 0xbd1fcec2, 0xbdcbbfe1,
0xbcd9a694, 0x3dc9092c, 0xbdb8b7ae, 0xbe02d742, 0x3da6d138, 0xbdc71b02,
0xbdb73fbc, 0x3c9a4d70, 0xbccfe258, 0xbd41c938, 0x3dc5d9b2, 0xbe03defc,
0xbdf79f67, 0xbdab833c, 0x3d9b0552, 0xbdcbf180, 0xbd88951c, 0x3cfad4d8,
};
// 2
uint32_t bias_vals[] = {
0x3d4f8340,
0x3bfd65c0,
};
// 1,1,1,2
uint32_t output_exp_vals[] = {
0xbd56b2c9,
0x3dd5aea1,
};
// 1,1,1,2
uint32_t output_relu_exp_vals[] = {
0x0,
0x3dd5aea1,
};
test_conv2d(set, strides, input_vals, kernel_vals, bias_vals, output_exp_vals,
output_relu_exp_vals, VALID_PADDING, NULL);
}
void test_same_padding_non_zero_strides_small() {
input_set *set = &small_input;
strides_input_set *strides = &small_non0_strides;
// 1,4,3,5
uint32_t input_vals[] = {
0x3f15a7d8, 0x3f55d27f, 0x3f04e322, 0x3f0bc669, 0x3f2eec77, 0x3d07cb10,
0x3e528880, 0x3f1da880, 0x3e3fb0a8, 0x3f40bb0b, 0x3e0e0bcc, 0x3f17da1f,
0x3f4f6880, 0x3d0b5ba0, 0x3f5ba1ae, 0x3e780c74, 0x3de61650, 0x3f7ae7a4,
0x3f71ba1f, 0x3f2fdc52, 0x3f50c293, 0x3e1d23d0, 0x3deae1a8, 0x3f615378,
0x3ba82d80, 0x3f4b0c93, 0x3e77825c, 0x3ea22f0a, 0x3aa20200, 0x3e33de00,
0x3e8e771c, 0x3f39eaa3, 0x3f324e26, 0x3f17f541, 0x3f3fe98e, 0x3ef6c34e,
0x3f3379fe, 0x3f6a0de8, 0x3ed4dfce, 0x3f1aca63, 0x3f51dd20, 0x3e50b72c,
0x3f6f62f4, 0x3ed5df52, 0x3de131a8, 0x3f7f3fc1, 0x3f26ab72, 0x3f70f111,
0x3e8ad072, 0x3e592e30, 0x3f32cd09, 0x3f4644b7, 0x3f19794f, 0x3f313923,
0x3f786a79, 0x3f114ab9, 0x3edfb038, 0x3d858c20, 0x3e50bd98, 0x3f563faa,
};
// 2,2,5,2
uint32_t kernel_vals[] = {
0xbadb6d80, 0x3da65588, 0xbdb06154, 0xbd13a074, 0xbca28f68, 0xbe1025bd,
0xbe5abfd6, 0xbe146a76, 0x3db50ae0, 0xbe05a938, 0x3df5a9a0, 0x3e3e2300,
0x3d759a80, 0x3cc1d430, 0x3d725b60, 0xbd67df54, 0xbe17a51c, 0xbdd2ced4,
0x3e0963a4, 0x3e44d336, 0xbe3c7496, 0xbd3c7db4, 0xbb912ca0, 0x3e4f5476,
0xbd8a65c2, 0xbdb281f2, 0xbdbcc6b6, 0xbe1ff856, 0xbe1b3afe, 0x3dcc5820,
0xbe2882ac, 0x3e2b57ec, 0x3e358cf2, 0xbe54696c, 0xbd340870, 0x3e45d54a,
0x3c07b640, 0xbe567290, 0xbdc76b34, 0x3dddf448,
};
// 2
uint32_t bias_vals[] = {
0x3c40ac50,
0xbdeac81f,
};
// 1,4,3,2
uint32_t output_exp_vals[] = {
0xbed199d0, 0xbee4ca5f, 0xbe0febf4, 0xbe2b0406, 0xbe3aa069, 0xbeb5053e,
0xbefd7c85, 0xbe846eb2, 0xbef1750b, 0x3e52c1ce, 0xbe8b07f1, 0xbe848b7f,
0xbeb43421, 0xbe83d2bf, 0xbeeeaca6, 0xbe243821, 0xbeb92f25, 0xbe1d4e2d,
0x3db760f2, 0xbcdf50da, 0x3d4d4b44, 0xbe14cbd2, 0x3b1861e5, 0xbe6dd479,
};
// 1,4,3,2
uint32_t output_relu_exp_vals[] = {
0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0,
0x0, 0x3e52c1ce, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0,
0x0, 0x0, 0x3db760f2, 0x0, 0x3d4d4b44, 0x0, 0x3b1861e5, 0x0,
};
test_conv2d(set, strides, input_vals, kernel_vals, bias_vals, output_exp_vals,
output_relu_exp_vals, SAME_PADDING, NULL);
}
void test_valid_padding_non_zero_strides_medium() {
input_set *set = &medium_input;
strides_input_set *strides = &medium_non0_strides;
// 3,10,8,5
uint32_t input_vals[] = {
0x3f1c5cd1, 0x3f791683, 0x3f2c91ac, 0x3f4e9dfc, 0x3f7ea33e, 0x3ec54b8e,
0x3f1e4b8c, 0x3d5833b0, 0x3f0d60ae, 0x3f19e648, 0x3e83f3a8, 0x3c473900,
0x3f1d2739, 0x3f07625d, 0x3eeef18e, 0x3f1195d9, 0x3eb71e74, 0x3e3b6ac8,
0x3e6479c8, 0x3f7dff2d, 0x3f693fc2, 0x3e233e04, 0x3ee4515a, 0x3e949d42,
0x3e92990a, 0x3e25d8f0, 0x3f703971, 0x3f1076e9, 0x3eb52618, 0x3eeb7c5e,
0x3f05f190, 0x3ed5b900, 0x3f6d0c84, 0x3c53b800, 0x3e8fe576, 0x3ea19870,
0x3d353270, 0x3e85db72, 0x3f06a6e9, 0x3edca954, 0x3f7d9b72, 0x3eefe830,
0x3f52005d, 0x3f16a350, 0x3f1bd1c3, 0x3cd1dda0, 0x3f5062ba, 0x3edf7ab6,
0x3effc884, 0x3f11188a, 0x3ca952e0, 0x3f214124, 0x3e866b22, 0x3f2955c8,
0x3ecfa5e4, 0x3f7e2118, 0x3f287963, 0x3d844d18, 0x3f7973f3, 0x38a72000,
0x3e3c1010, 0x3f0be345, 0x3d28ec20, 0x3f22087d, 0x3f068ddb, 0x3f188a64,
0x3f2fb886, 0x3f7d0194, 0x3ea24914, 0x3ef86ab2, 0x3ee9e7d8, 0x3ec1a006,
0x3ec0275a, 0x3e96a4e0, 0x3f134183, 0x3eca1262, 0x3e9a29c4, 0x3f0664b9,
0x3e386b68, 0x3f62147d, 0x3ec6d2ae, 0x3d05b790, 0x3f22b3cf, 0x3e1d126c,
0x3f7c6ace, 0x3f51561e, 0x3dfc9540, 0x3f570177, 0x3ed6c570, 0x3e503d48,
0x3f061e36, 0x3f3779bc, 0x3f6d3ea4, 0x3ed2b8f0, 0x3ef0098a, 0x3c582640,
0x3e50d684, 0x3f67cf69, 0x3f38f141, 0x3e996d74, 0x3e51bc50, 0x3f125d1c,
0x3e04aaf8, 0x3e929380, 0x3d207840, 0x3ea8bac2, 0x3f28db44, 0x3ed6fc08,
0x3f660ac5, 0x3e0b7044, 0x3f406965, 0x3f09101a, 0x3e5107c4, 0x3e18bd60,
0x3f7822c5, 0x3e34aae4, 0x3e340b80, 0x3e5b77c8, 0x3f005d22, 0x3f4aa1bf,
0x3f78b197, 0x3f7a68f8, 0x3f1e5ccd, 0x3f730fcf, 0x3f44c15e, 0x3ec6fb36,
0x3e5645b4, 0x3f78259e, 0x3f7f4803, 0x3f472298, 0x3ed2083a, 0x3f6947b3,
0x3dcbab68, 0x3e576af0, 0x3dd27418, 0x3f644ba1, 0x3dd16648, 0x3f4d7757,
0x3f16ccee, 0x3f0d7145, 0x3f405573, 0x3ec26b6a, 0x3e768b88, 0x3f0f1ea5,
0x3f7721a8, 0x3f6c8f35, 0x3f45a0c5, 0x3ece2346, 0x3e87f956, 0x3e624290,
0x3f4a0a77, 0x3e976742, 0x3f659a49, 0x3ea78da6, 0x3f71de3a, 0x3eb25766,
0x3f6b8687, 0x3ec619b4, 0x3f22ab58, 0x3f21a4b1, 0x3e2c4d58, 0x3e1fa9f4,
0x3f0ba7b7, 0x3e1bf918, 0x3f5a0db7, 0x3eecf3ec, 0x3da412f0, 0x3e1a1868,
0x3f151e94, 0x3e925530, 0x3f69f007, 0x3e8c9090, 0x3f6cf576, 0x3f376386,
0x3f3b02a1, 0x3eaa14d2, 0x3f14b0f6, 0x3edee82c, 0x3e9b1e38, 0x3d687920,
0x3d14bf10, 0x3f75db54, 0x3e098730, 0x3f6a5273, 0x3eefbec2, 0x3f347889,
0x3e85c90e, 0x3f257acf, 0x3eef270c, 0x3f2ab1ce, 0x3f7c9ab6, 0x3f2dc6c3,
0x3e13f588, 0x3f3a1342, 0x3f2662f1, 0x3eb71f00, 0x3ebfd1aa, 0x3dd372d0,
0x3f7ec25c, 0x3f6b1c87, 0x3db01a98, 0x3ec87bb6, 0x3f07cc33, 0x3eb1bfb8,
0x3f7dc7b5, 0x3d3e5780, 0x3f5e1e6c, 0x3f657be6, 0x3f458e66, 0x3f7472eb,
0x3f2013a4, 0x3de66a18, 0x3eb78e5c, 0x3eb6c6ca, 0x3f55f02e, 0x3eec9da2,
0x3f0d9866, 0x3f3c3ed0, 0x3eda9af6, 0x3f59fdf0, 0x3e7d7250, 0x3f785f54,
0x3f6dfe3d, 0x3ed49d8a, 0x3edef7e8, 0x3f19df75, 0x3edcd28e, 0x3f54e5bc,
0x3efe68da, 0x3de553f8, 0x3f309bf0, 0x3f636253, 0x3e736cbc, 0x3e9f4a92,
0x3ea0f742, 0x3f65cd03, 0x3eb129be, 0x3ee3eff4, 0x3f1e0064, 0x3ee342a2,
0x3f63607e, 0x3f670900, 0x3eaf70d0, 0x3f04d2b6, 0x3e6b5274, 0x3ed9ae54,
0x3c85d0a0, 0x3dc89400, 0x3f51234d, 0x3f416e0c, 0x3d722410, 0x3edb1ab6,
0x3f000d8a, 0x3ecc2f2a, 0x3f2cee76, 0x3d456140, 0x3ed308d6, 0x3f604fcb,
0x3f0cd5fb, 0x3f14a72e, 0x3f778324, 0x3f297b9e, 0x3f6bd78d, 0x3ea0c53e,
0x3f1eeb76, 0x3f389544, 0x3f3237c9, 0x3f0bf147, 0x3da67288, 0x3f707120,
0x3f37cbd0, 0x3f6247d3, 0x3f1d125e, 0x3f33cc23, 0x3f2e9f22, 0x3d8a0380,
0x3ed9dc0e, 0x3e912c0c, 0x3f1060ed, 0x3d7c0730, 0x3f7687af, 0x3ef5d92a,
0x3f609ead, 0x3e9979e6, 0x3e0c595c, 0x3f79428a, 0x3f57a7e5, 0x3e9579ea,
0x3ea971f4, 0x3f386a7c, 0x3f7390ed, 0x3ebabbb8, 0x3e1c5678, 0x3f217988,
0x3f117199, 0x3da02b40, 0x3f703588, 0x3f13b60f, 0x3ed1b5fc, 0x3f3e6cae,
0x3e9f67bc, 0x3e2b1aac, 0x3f4cb8a9, 0x3f53a519, 0x3a27c800, 0x3e1fdd6c,
0x3ee44c5c, 0x3d498a60, 0x3f074015, 0x3d1df6b0, 0x3f79362b, 0x3ef24000,
0x3f73b535, 0x3f3059ad, 0x3f4e7e34, 0x3e92007e, 0x3f391d68, 0x3f2e0bab,
0x3f37c4e5, 0x3f51dfe9, 0x3e8b1f3e, 0x3f5782b3, 0x3e194948, 0x3ec12fac,
0x3d4326e0, 0x3f430f94, 0x3ed76c4c, 0x3ec8db0e, 0x3f376190, 0x3f309640,
0x3e89938c, 0x3dafa2c8, 0x3f6f0cdc, 0x3f5e7499, 0x3de06a60, 0x3e81ea12,
0x3f3f6c4a, 0x3dee5a08, 0x3f05e3ee, 0x3f60ef76, 0x3ee99b78, 0x3f2649f3,
0x3eb52d52, 0x3f190c77, 0x3f282a49, 0x3ee764b6, 0x3f336ae2, 0x3f29fb42,
0x3f04a6a5, 0x3f7e9092, 0x3de25378, 0x3e4429ec, 0x3d7dc720, 0x3f589f7f,
0x3be58200, 0x3f6ab5a3, 0x3f526157, 0x3f462b5c, 0x3e93eb02, 0x3c98e0e0,
0x3e8f12a6, 0x3f793c60, 0x3f42a14e, 0x3e6fd614, 0x3f568a83, 0x3f2d674c,
0x3f0b1b53, 0x3f4f62e5, 0x3f37c6c6, 0x3f71a430, 0x3f6f5512, 0x3d0711a0,
0x3e39936c, 0x3d84cc60, 0x3f4e6c4c, 0x3f408d71, 0x3f59dace, 0x3f7f7cf6,
0x3f020d5a, 0x3f551925, 0x3f121f4a, 0x3e046080, 0x3e508190, 0x3e8a4494,
0x3da183a0, 0x3f6ce8e2, 0x3f1d6c73, 0x3f7fc99f, 0x3f547bfb, 0x3f5ca5ed,
0x3f1134e1, 0x3f1299c6, 0x3ca51ba0, 0x3d0d3b60, 0x3ce4c600, 0x3e898f56,
0x3e882ec2, 0x3d5f4b20, 0x3f4d9c4e, 0x3f1c986d, 0x3e309f48, 0x3e4020c8,
0x3eccd314, 0x3e98f244, 0x3f0d6465, 0x3e9a5166, 0x3e56e3c8, 0x3f64e98c,
0x3f676a36, 0x3f6d4529, 0x3f16aa31, 0x3f389366, 0x3f5e765d, 0x3c59d440,
0x3e0b9e44, 0x3e9b588e, 0x3eee1028, 0x3e24fd00, 0x3e396a8c, 0x3f29ffd0,
0x3ed77eb4, 0x3ce35560, 0x3f3b759d, 0x3dbee238, 0x3f388f70, 0x3cd5b620,
0x3e58c9d8, 0x3ddccad0, 0x3ed136da, 0x3f218c2b, 0x3ce9fdc0, 0x3f33e880,
0x3d983f38, 0x3f63bf3b, 0x3f7f446e, 0x3f31951d, 0x3ea92bc8, 0x3f272096,
0x3f703270, 0x3f68cdc5, 0x3e93a406, 0x3f58596d, 0x3f01e193, 0x3d848d28,
0x3f6dd176, 0x3e488128, 0x3f653069, 0x3e88930a, 0x3ef4e888, 0x3ee01726,
0x3e9afca8, 0x3f129ec2, 0x3c916480, 0x3f4b21b5, 0x3f6b5aef, 0x3dbacf50,
0x3eebc1c2, 0x3e9ce4a6, 0x3f386c0d, 0x3d0b7160, 0x3dcd6a08, 0x3eaebd18,
0x3f2ac07f, 0x3f68d865, 0x3e6bb5b4, 0x3f4c9777, 0x3ef30e1e, 0x3d185e40,
0x3ef8e626, 0x3ec93f16, 0x3f410e14, 0x3f174c76, 0x3c69c140, 0x3e27fdac,
0x3eb3f416, 0x3f110cd0, 0x3df1d108, 0x3f103d5c, 0x3f54ad08, 0x3f763ba1,
0x3e6850a8, 0x3ec2d12e, 0x3e0f2730, 0x3ea836c0, 0x3f3c5f87, 0x3f2a95f2,
0x3dac3c60, 0x3d7dc9a0, 0x3f4246d1, 0x3f526877, 0x3f05c01c, 0x3f4e8f85,
0x3e9d5de2, 0x3ec0e98a, 0x3f57111a, 0x3e121668, 0x3e39f098, 0x3ed6c8a6,
0x3f36697c, 0x3e0188c0, 0x3f1b8b57, 0x3f74ef01, 0x3d3af8c0, 0x3f732064,
0x3ee2598e, 0x3f748cb4, 0x3ebe0214, 0x3e2a1f98, 0x3f770581, 0x3f12b0ef,
0x3c064c00, 0x3e5ab018, 0x3f086487, 0x3d382fd0, 0x3f214ab6, 0x3b37c200,
0x3f3d7f7d, 0x3ce99680, 0x3e2d5274, 0x3f41d260, 0x3f106666, 0x3f131506,
0x3edc04a0, 0x3e96270a, 0x3e9a22c8, 0x3f606309, 0x3f58a266, 0x3e356674,
0x3e397f40, 0x3eb2cea4, 0x3e487408, 0x3f356fa6, 0x3e1d1000, 0x3f7ad0e6,
0x3f6b1a57, 0x3f1c2d74, 0x3c962960, 0x3f324ed7, 0x3ee963f0, 0x3ebb4b2a,
0x3f59fb6c, 0x3f50f572, 0x3debd678, 0x3e2c5ea0, 0x3e1c57f8, 0x3f2776e9,
0x3ecfcaaa, 0x3df7fa50, 0x3e8e152c, 0x3f56ca02, 0x3f1bb359, 0x3e8edafc,
0x3f48fefc, 0x3e653078, 0x3f56ad08, 0x3f7fb57c, 0x3f514972, 0x3f17ee83,
0x3f3438ff, 0x3eafba14, 0x3f536375, 0x3f5d5ab2, 0x3f74923d, 0x3d82d098,
0x3eccd20a, 0x3e600990, 0x3d77f330, 0x3ebb9286, 0x3d0a7040, 0x3e063498,
0x3f0b105a, 0x3ee8da04, 0x3e845920, 0x3be94a00, 0x3da8dd30, 0x3f416444,
0x3f691d4c, 0x3ee84ee0, 0x3f371b6a, 0x3f302264, 0x3f70e269, 0x3f31929d,
0x3f00a5c9, 0x3f3f4a4a, 0x3e92a382, 0x3b260a00, 0x3f6bd609, 0x3f619e54,
0x3e4201ec, 0x3f65e675, 0x3f45500c, 0x3ea844e6, 0x3b007400, 0x3f1fbdc3,
0x3dcc11d8, 0x3eb5acd0, 0x3e658f20, 0x3eedc5ca, 0x3f5e7cd6, 0x3edc9d48,
0x3c2a3840, 0x3f6e1185, 0x3f389eff, 0x3f12993c, 0x3f02c385, 0x3f1c69a5,
0x3f355a74, 0x3e660ce0, 0x3ec9d5ca, 0x3f41c905, 0x3e69429c, 0x3f7ae0b5,
0x3e7bd4f8, 0x3f0974fa, 0x3f679826, 0x3f1223a0, 0x3e9511d6, 0x3f3ddbb8,
0x3ee48cba, 0x3f56aea3, 0x3e3395e8, 0x3e374304, 0x3cab4600, 0x3f4072a9,
0x3f385858, 0x3dfd6908, 0x3ee1fe56, 0x3f1e5491, 0x3e2458a0, 0x3e8a7f62,
0x3eea4734, 0x3cef49e0, 0x3de73ba8, 0x3f40e8bc, 0x3e125b3c, 0x3eaa00c6,
0x3f322c3e, 0x3f3f9fb9, 0x3f4824d1, 0x3d57d540, 0x3f4adcd9, 0x3f5e694c,
0x3d947048, 0x3e49cacc, 0x3f065972, 0x3f0b0932, 0x3f1eba2f, 0x3f4eb1ec,
0x3f688528, 0x3f7c2509, 0x3f63e958, 0x3ee6b498, 0x3cda97c0, 0x3f7db83b,
0x3ecf5fee, 0x3f17fcd2, 0x3f3f7695, 0x3da34278, 0x3e9b13d4, 0x3ef53908,
0x3eadae28, 0x3e3b01e0, 0x3f01e2e6, 0x3eb84bbc, 0x3e0beec4, 0x3f56b2de,
0x3f7d3712, 0x3f64dcce, 0x3f3fca1d, 0x3f274c18, 0x3f06fe9c, 0x3b395900,
0x3d930d08, 0x3dddc708, 0x3f51265e, 0x3f180648, 0x3cac41a0, 0x3e8dcf08,
0x3f329e5e, 0x3be6c100, 0x3eadaa24, 0x3ec381bc, 0x3f706b90, 0x3f1ddcee,
0x3e177d4c, 0x3f007599, 0x3f688e5c, 0x3ed511c8, 0x3f50aea5, 0x3f5ecf83,
0x3ecec730, 0x3f0cd0ec, 0x3f350747, 0x3d868bf0, 0x3f1e4e4d, 0x3eea3c4e,
0x3f312869, 0x3c9ba7c0, 0x3dad46c0, 0x3ef00a16, 0x3de16750, 0x3e08b354,
0x3eaf7e2e, 0x3f397d10, 0x3f16e18d, 0x3efe6cf6, 0x3f2eae06, 0x3e7bfd6c,
0x3f2e1b63, 0x3ed913c8, 0x3dae08e0, 0x3ee364c8, 0x3f6be96e, 0x3d0dca70,
0x3e1a2144, 0x3ec8ed30, 0x3f334e30, 0x3f352007, 0x3e627a74, 0x3d0649a0,
0x3f4a2e92, 0x3f5a6628, 0x3e0a67f4, 0x3d2e1fc0, 0x3f542fc6, 0x3e1bfb0c,
0x3f37a39a, 0x3f14a316, 0x3e113104, 0x3ee18d5e, 0x3e26e2b0, 0x3eea393c,
0x3efd0ec4, 0x3e8c7e2e, 0x3ea0ecc4, 0x3ec17da6, 0x3ef03b0a, 0x3e96d13c,
0x3f69ecc8, 0x3ebdec2a, 0x3eb9d25a, 0x3e3ee23c, 0x3e5b9bd8, 0x3ecd0b00,
0x3e68be50, 0x3f15ab36, 0x3ea3677e, 0x3eab0ca0, 0x3f498457, 0x3eb42d54,
0x3f154000, 0x3f532de5, 0x3f64e49b, 0x3f798f25, 0x3d90fbd0, 0x3f40e88a,
0x3e99584a, 0x3e10f8bc, 0x3f3a556f, 0x3f5fc9b9, 0x3e1f4b0c, 0x3f4f7e3b,
0x3e1347bc, 0x3d569bf0, 0x3f344491, 0x3eedef0c, 0x3e9a7ce2, 0x3eb104e4,
0x3f40699a, 0x3e7dc994, 0x3f4796db, 0x3ec460be, 0x3f023a0b, 0x3f30d2d9,
0x3ea89fb2, 0x3f6eb763, 0x3f02b960, 0x3f22d7d7, 0x3f633559, 0x3f396cf4,
0x3eed5372, 0x3d289f00, 0x3f03d74e, 0x3bde4300, 0x3ea133aa, 0x3e57bf18,
0x3efd71a0, 0x3ec25fce, 0x3f4eb8d8, 0x3f21c0b7, 0x3f24cb93, 0x3ee63240,
0x3e498c78, 0x3f68c5f7, 0x3f233e64, 0x3f48e4a2, 0x3ee9007c, 0x3ed210b4,
0x3d0e9cf0, 0x3e3db310, 0x3f20106f, 0x3ddc15d8, 0x3ddf9fd8, 0x3f172511,
0x3e343374, 0x3f07d4e2, 0x3e8eb102, 0x3f2c11d2, 0x3df2d818, 0x3f1c0428,
0x3f6fcb0b, 0x3e886f24, 0x3e9f92a4, 0x3f39de65, 0x3f4f3400, 0x3ea35fc4,
0x3b975a80, 0x3e944572, 0x3f2fd65e, 0x3eadc1a8, 0x3ebe30c2, 0x3f0c351c,
0x3eca9b2c, 0x3edfe3ae, 0x3f3de46d, 0x3f6e48ee, 0x3f5741c0, 0x3ea13e1e,
0x3db148e8, 0x3e42e970, 0x3dbbcc98, 0x3f33f355, 0x3f46238f, 0x3f333266,
0x3f7a7ccd, 0x3f093f70, 0x3e865db0, 0x3ecf5c38, 0x3f53867b, 0x3f5b34a5,
0x3f7b540b, 0x3d276490, 0x3f610d47, 0x3ed3393c, 0x3f17f9af, 0x3e8d37c2,
0x3dc5dc40, 0x3f1d351b, 0x3f3c54cb, 0x3f38f23d, 0x3f7ab619, 0x3f025fd2,
0x3ed871b0, 0x3f2f71c2, 0x3f2c9067, 0x3f719461, 0x3f580d8b, 0x3e5844e4,
0x3e999bba, 0x3e51dec8, 0x3f77155f, 0x3e9853b8, 0x3d2cb3a0, 0x3eee3a78,
0x3efdc488, 0x3ee7925a, 0x3e6965d4, 0x3eb551b0, 0x3f7f6a87, 0x3f2221ee,
0x3f73cb06, 0x3e6edecc, 0x3d177cc0, 0x3f009ab3, 0x3ec63798, 0x3e9074b2,
0x3ee9d6a2, 0x3ed0921a, 0x3dd0c7a0, 0x3f7abeb9, 0x3ea85eb0, 0x3cce36e0,
0x3f021792, 0x3eed5492, 0x3e9d46ee, 0x3f3069ed, 0x3d70c0f0, 0x3eef809c,
0x3f3fadd7, 0x3d5afe50, 0x3f1b0a26, 0x3f7e0083, 0x3e823f9c, 0x3eac6f80,
0x3e861fe0, 0x3f469033, 0x3e85e8d4, 0x3f58d714, 0x3e73dfc0, 0x3d134840,
0x3f79a410, 0x3ed1d3b0, 0x3f481647, 0x3f697191, 0x3f697a69, 0x3e3f7e5c,
0x3ebb81aa, 0x3ec0ed6e, 0x3f43b6a8, 0x3e8d1448, 0x3f43eeda, 0x3d78d1c0,
0x3e9ada58, 0x3ed481e6, 0x3e17c540, 0x3f2ad303, 0x3ed2de74, 0x3f5078eb,
0x3f068a15, 0x3ee453a4, 0x3f43875a, 0x3f272ed5, 0x3f6f5cf5, 0x3ee4ecbc,
0x3e270294, 0x3f3cb1ae, 0x3e980c9c, 0x3d2cc5b0, 0x3f1db79a, 0x3e748b78,
0x3ea03cf4, 0x3ef516de, 0x3ec8b058, 0x3f048c8f, 0x3f414175, 0x3dd6ace8,
0x3efec682, 0x3ee83e1e, 0x3e4a2bc0, 0x3dff2f70, 0x3f667302, 0x3b134c00,
0x3ea34ffa, 0x3f17b711, 0x3f3c5d23, 0x3e5c0a14, 0x3f5d050b, 0x3f6d4657,
0x3f03590a, 0x3f5d57b0, 0x3f51d579, 0x3f676519, 0x3eda2f96, 0x3f084691,
0x3eff6d00, 0x3cc1f8e0, 0x3f574567, 0x3f6c864d, 0x3e9e7106, 0x3f38d631,
0x3ed3b61e, 0x3eabee16, 0x3f00a1fe, 0x3f20cbe7, 0x3e6132e4, 0x3f015b4e,
0x3f42e35a, 0x3ee43b3a, 0x3dc3f248, 0x3f63d109, 0x3f1625a3, 0x3d012ed0,
0x3e55597c, 0x3f26431d, 0x3f7c5832, 0x3f0a58b4, 0x3ec0ab44, 0x3eaae0dc,
0x3f455cc3, 0x3f1e6ae1, 0x3f285567, 0x3ecbef02, 0x3f293ed2, 0x3ee3d38e,
0x3f74a4b0, 0x3ee37eec, 0x3ea597fc, 0x3f389e84, 0x3de48e80, 0x3b5ff100,
0x3f1f5907, 0x3f35c290, 0x3ef6e438, 0x3eff11c4, 0x3ee3e418, 0x3f7eaab3,
0x3e8b5a02, 0x3f79f6c1, 0x3f002e84, 0x3f196ec6, 0x3e34b598, 0x3ee25420,
0x3e1da0c4, 0x3d8b0de8, 0x3d85f3e0, 0x3ef205f2, 0x3eb61442, 0x3f3113b1,
0x3e15dd68, 0x3f3e49f3, 0x3e9e4766, 0x3cf98420, 0x3f734957, 0x3f766464,
0x3e9e6f48, 0x3f704f9d, 0x3ed5b958, 0x3ed37c72, 0x3f5fb886, 0x3ea368c6,
0x3f6d92b7, 0x3d9c2f30, 0x3f659ed0, 0x3f46f020, 0x3dd38b30, 0x3ef904e2,
0x3f4aa328, 0x3f021c29, 0x3f58ac51, 0x3f345424, 0x3ea74316, 0x3f446948,
0x3e009768, 0x3f314677, 0x3e0b7478, 0x3e43a70c, 0x3f77e8cd, 0x3e671bc0,
0x3f480976, 0x3ec9ace4, 0x3f5b96cd, 0x3eaa08d4, 0x3f2fa4c2, 0x3f54c6f5,
0x3f38dc93, 0x3eee5ef4, 0x3f068253, 0x3e91375e, 0x3ec4324a, 0x3f606da6,
0x3f788d37, 0x3f1912f2, 0x3f1ec911, 0x3edf74ee, 0x3f1cf1ac, 0x3f48fe58,
0x3f7fc5a0, 0x3f7dfc14, 0x3f5b451b, 0x3ea08046, 0x3f7009fb, 0x3f755397,
0x3f2b7aa6, 0x3effe866, 0x3e90a61c, 0x3f653cfc, 0x3e770510, 0x3ee80a80,
0x3e32d938, 0x3f47d81d, 0x3e3347e0, 0x3d98acf8, 0x3f1ffe86, 0x3de5fdd8,
0x3f17bfb3, 0x3f257744, 0x3d4a2960, 0x3e5903b0, 0x3f479656, 0x3f6bc03c,
0x3e95d8ea, 0x3e84b880, 0x3da394b0, 0x3f6a065f, 0x3f3033f8, 0x3f403973,
0x3f6b9ee5, 0x3dfe4278, 0x3f636808, 0x3f671d51, 0x3ee8fbac, 0x3f2afd1f,
0x3da4c250, 0x3f154d38, 0x3dc6da18, 0x3f0b14b9, 0x3f7b804f, 0x3f1a44a2,
0x3f585b1d, 0x3f66e71d, 0x3e1de574, 0x3f5ee053, 0x3f18eea8, 0x3d27a9c0,
0x3e8a6136, 0x3d59a660, 0x3f01db0f, 0x3e93354c, 0x3e0f1f7c, 0x3ebb7906,
0x3f30338b, 0x3e42accc, 0x3c32fd80, 0x3c9ab440, 0x3f00c421, 0x3f1dbece,
0x3f7cfdf9, 0x3f455695, 0x3f6720ba, 0x3ecffdea, 0x3f0b1e52, 0x3f66ad8c,
0x3e5d7c88, 0x3efdaefe, 0x3f1951b3, 0x3f56efd9, 0x3f376aad, 0x3ed542ea,
0x3e8a1efc, 0x3f720f76, 0x3e9519d8, 0x3e685028, 0x3da56500, 0x3f2b4fd1,
0x3e4e166c, 0x3f2d7003, 0x3f64d5f4, 0x3e5f26a8, 0x3ea02bb8, 0x3f278ce4,
0x3e660294, 0x3f0ba669, 0x3f35657f, 0x3e37a1bc, 0x3dc901c0, 0x3f1a8587,
0x3f54a350, 0x3d0bd2c0, 0x3f315e90, 0x3f141915, 0x3ec58e3a, 0x3f356c46,
0x3e5c098c, 0x3f176fc8, 0x3f6972a7, 0x3f59a44e, 0x3f43f815, 0x3f5ff729,
0x3e1158ec, 0x3f564eea, 0x3f6fef0b, 0x3d7aede0, 0x3e796be4, 0x3e2ec3dc,
0x3f6d9d4a, 0x3f6f2bc9, 0x3e641b0c, 0x3e9576ce, 0x3f3c03ec, 0x3f454a1d,
0x3f05c20b, 0x3f4cc550, 0x3eb81f36, 0x3f615407, 0x3eead1e2, 0x3f4b7b10,
0x3ecaaa80, 0x3f36b6fb, 0x3f0a6b8c, 0x3f216b9d, 0x39e1e000, 0x3d852408,
0x3e3024c0, 0x3e7ed480, 0x3e95077a, 0x3e53a9ec, 0x3f746732, 0x3f0cd57f,
0x3f3dced4, 0x3f6ca63a, 0x3f16e067, 0x3efa76c0, 0x3f34468a, 0x3f1652eb,
0x3f401718, 0x3f00b6bf, 0x3f67416b, 0x3eed6a88, 0x3d7f04b0, 0x3e0941c8,
};
// 2,3,5,5
uint32_t kernel_vals[] = {
0x3dc1ef2c, 0xbd4afe40, 0xbd9592c5, 0xbe1fade9, 0x3d6c2508, 0xbdd2c720,
0x3c195110, 0x3e39d55a, 0xbe19fe5a, 0xbd4f7cb8, 0xbdd860aa, 0xbd9f8af7,
0xbe15e820, 0x3bc06080, 0x3dd8b200, 0x3d98e864, 0xbdf9352c, 0x3d6e6124,
0x3e1ce872, 0xbddbdef8, 0xbdd473e1, 0xbe33b833, 0x3da421b8, 0xbe2d2c6b,
0x3e3aa2e4, 0x3de87114, 0xbd1046d8, 0x3d5b0694, 0x3dae7864, 0x3d990464,
0xbdd1cee5, 0x3db34934, 0x3dbce7a4, 0x3dc31f58, 0xbd8b648c, 0x3e107fd2,
0x3d86b7ce, 0xbd57180c, 0xbd6735f0, 0xbc94cfe0, 0xbe0ded40, 0xbd85f78e,
0x3dc8b7ec, 0xbca29f38, 0xbdbdcb4e, 0xbcea63b0, 0xbe35a862, 0x3e3869a6,
0x3e03dc16, 0x3e369824, 0xbd82ddc2, 0x3c8f2268, 0x3c086970, 0xbdd3ca16,
0xbd5cfa1c, 0xbe0fdbe9, 0x3db979b0, 0xbdf6cec4, 0xbc986630, 0x3d3c4390,
0x3def1cd0, 0xbe04c359, 0xbe379160, 0xbd2299c0, 0x3d639694, 0x3e32eeae,
0x3d234c44, 0x3cd05b90, 0xbe1ca8af, 0x3e2bd088, 0x3e1ebe9a, 0xbd17b4c0,
0x3dd7029c, 0x3dbb63b0, 0xbe0cffec, 0x3db369c8, 0x3e2ce704, 0xbe2894ee,
0x3db3e32c, 0x3d7a42f8, 0x3dfb04b8, 0xbe2addc3, 0x3e1c07c8, 0x3e0fe1ac,
0x3d849fca, 0x3c5e5ae0, 0x3d8afe3c, 0xbd91a2ca, 0x3cc6c5c0, 0xbd345d88,
0x3e0df82e, 0xbe0e8518, 0xbdfa2da2, 0xbdca4ed7, 0xbd493a3c, 0x3bf73200,
0x3dfb6e40, 0xbca86c80, 0x3ddd534c, 0xbe178f40, 0x3cabdc18, 0x3c2027b0,
0x3e1b0c4a, 0xbe03fa6d, 0xbd5728b0, 0x3e04e9ae, 0xbd05d144, 0x3c20c3f0,
0x3de76758, 0xbdb6aeca, 0xbdb0897a, 0x3e1eaf24, 0x3d215434, 0x3db95a08,
0x3d830e56, 0xbd884998, 0x3db9c368, 0xbe10f35d, 0x3e256c04, 0xba02ad00,
0xbe0437fa, 0xbe30f32c, 0x3d831984, 0x3e1e4382, 0xbe0db618, 0xbd3297e4,
0xbde6de41, 0x3dc95498, 0xbdb933e1, 0x3de72ce0, 0xbcaa3940, 0xbe0aef70,
0x3db150b0, 0x3dcee6c0, 0x3c843678, 0x3e2bd24c, 0x3c4431b0, 0xbe12ca69,
0xbe02fabe, 0xbe076e08, 0x3dd23240, 0xbe29bb4c, 0xbd66b750, 0x3e2af9f0,
0xbdb5289e, 0xbd79f286, 0x3e1b6538, 0xbe376a62, 0xbdc9851c, 0xba66d200,
};
// 5
uint32_t bias_vals[] = {
0xbd3e9f8c, 0xbdf4be10, 0xbd861c9e, 0x3d0d4978, 0xbdc35d93,
};
// 3,5,2,5
uint32_t output_exp_vals[] = {
0x3e3eb29f, 0xbf2431cf, 0xbd3c2cb9, 0x3ec9941a, 0xbe019504, 0x3ed55520,
0xbf268f78, 0xbeb25c44, 0xbbc1b02b, 0x3d9680fc, 0x3e5a57a9, 0xbf0861e9,
0xbe980c59, 0x3ec9af1f, 0x3e057cc4, 0x3c397765, 0xbec5d321, 0xbe7106e9,
0x3e6b25bf, 0xbe162702, 0x3d285bae, 0xbee24ed1, 0xbe5f61c6, 0x3e6d9bf1,
0xbe21e5ce, 0x3e2257db, 0xbeae2fd2, 0x3d2453d3, 0x3ec62915, 0xbee4c828,
0x3e58d564, 0xbf232884, 0x3e1c46c1, 0x3e3baa73, 0xbded1d5b, 0x3e09ce53,
0xbeebf081, 0x3dae4215, 0x3f12fb01, 0xbd9626b6, 0x3de73a84, 0xbec044c7,
0x3e517516, 0x3e892149, 0xbd851637, 0x3eb5d71c, 0xbf076e93, 0x3c6657e7,
0x3d81414f, 0xbe3d5ca7, 0x3c279fa2, 0xbf1065b3, 0xbe00250d, 0x3efef655,
0xbe071035, 0x3e3a4085, 0xbf0e87c2, 0x3db127ea, 0x3e4d846b, 0x3e1ecce4,
0x3e3107ba, 0xbf23824b, 0x3e269cf3, 0x3e95ad70, 0xbe098bb7, 0x3dec94ee,
0xbe65fde6, 0x3db798b1, 0x3db34030, 0xbe7429ad, 0xbbee7768, 0xbf19b3e7,
0xbe166291, 0x3d948b67, 0xbe4f7ce2, 0x3e108942, 0xbeea1e35, 0xbe3bb4b7,
0x3e84725a, 0xbea914ea, 0x3e041cec, 0xbed55994, 0x3e31f984, 0xbd3233e6,
0xbe1658bb, 0x3e09a5ee, 0xbee9f260, 0xbde464c0, 0x3de69ade, 0xbd79033a,
0x3d9a3ba7, 0xbef017f5, 0x3df0f795, 0x3d436760, 0xbcb32c93, 0x3ec1bc0a,
0xbec3a50b, 0xbe3b3a62, 0x3e0529ee, 0xbe5c712e, 0x3eab528d, 0xbe7880c5,
0xbe7cc267, 0x3e9789ef, 0xbe1f75ee, 0x3dfd3bff, 0xbf0529de, 0xbdb2af09,
0x3ebbc0cb, 0xbe1f469c, 0x3e29a963, 0xbe893841, 0xbec59110, 0x3e03a680,
0xbe0fe3fc, 0x3e4ab203, 0xbf1040a7, 0xbd4d9423, 0x3de26c26, 0xbd11f7e5,
0x3ec20296, 0xbf17732c, 0x3e541b46, 0x3eb4273c, 0xbe80ea72, 0x3e6f3363,
0xbf060cb4, 0xbe42defb, 0x3e723a44, 0x3d6ffd66, 0x3e162cfb, 0xbf6b8da8,
0x3d56a599, 0x3eb102a0, 0xbe420cb5, 0x3eb0a5be, 0xbef142c6, 0x3d937133,
0x3ea7bcdf, 0xbeb35b92, 0x3d8e0431, 0xbed46437, 0x3e651ef5, 0x3eae0d1e,
0xbdd41583, 0x3e4231b1, 0xbe3b758c, 0xbe89c7f5, 0xbd5ab5c6, 0xbe467e60,
};
// 3,5,2,5
uint32_t output_relu_exp_vals[] = {
0x3e3eb29f, 0x0, 0x0, 0x3ec9941a, 0x0, 0x3ed55520,
0x0, 0x0, 0x0, 0x3d9680fc, 0x3e5a57a9, 0x0,
0x0, 0x3ec9af1f, 0x3e057cc4, 0x3c397765, 0x0, 0x0,
0x3e6b25bf, 0x0, 0x3d285bae, 0x0, 0x0, 0x3e6d9bf1,
0x0, 0x3e2257db, 0x0, 0x3d2453d3, 0x3ec62915, 0x0,
0x3e58d564, 0x0, 0x3e1c46c1, 0x3e3baa73, 0x0, 0x3e09ce53,
0x0, 0x3dae4215, 0x3f12fb01, 0x0, 0x3de73a84, 0x0,
0x3e517516, 0x3e892149, 0x0, 0x3eb5d71c, 0x0, 0x3c6657e7,
0x3d81414f, 0x0, 0x3c279fa2, 0x0, 0x0, 0x3efef655,
0x0, 0x3e3a4085, 0x0, 0x3db127ea, 0x3e4d846b, 0x3e1ecce4,
0x3e3107ba, 0x0, 0x3e269cf3, 0x3e95ad70, 0x0, 0x3dec94ee,
0x0, 0x3db798b1, 0x3db34030, 0x0, 0x0, 0x0,
0x0, 0x3d948b67, 0x0, 0x3e108942, 0x0, 0x0,
0x3e84725a, 0x0, 0x3e041cec, 0x0, 0x3e31f984, 0x0,
0x0, 0x3e09a5ee, 0x0, 0x0, 0x3de69ade, 0x0,
0x3d9a3ba7, 0x0, 0x3df0f795, 0x3d436760, 0x0, 0x3ec1bc0a,
0x0, 0x0, 0x3e0529ee, 0x0, 0x3eab528d, 0x0,
0x0, 0x3e9789ef, 0x0, 0x3dfd3bff, 0x0, 0x0,
0x3ebbc0cb, 0x0, 0x3e29a963, 0x0, 0x0, 0x3e03a680,
0x0, 0x3e4ab203, 0x0, 0x0, 0x3de26c26, 0x0,
0x3ec20296, 0x0, 0x3e541b46, 0x3eb4273c, 0x0, 0x3e6f3363,
0x0, 0x0, 0x3e723a44, 0x3d6ffd66, 0x3e162cfb, 0x0,
0x3d56a599, 0x3eb102a0, 0x0, 0x3eb0a5be, 0x0, 0x3d937133,
0x3ea7bcdf, 0x0, 0x3d8e0431, 0x0, 0x3e651ef5, 0x3eae0d1e,
0x0, 0x3e4231b1, 0x0, 0x0, 0x0, 0x0,
};
test_conv2d(set, strides, input_vals, kernel_vals, bias_vals, output_exp_vals,
output_relu_exp_vals, VALID_PADDING, NULL);
}
void test_valid_padding_zero_strides_medium() {
input_set *set = &medium_input;
strides_input_set *strides = &zero_strides;
// 3,10,8,5
uint32_t input_vals[] = {
0x3f36b631, 0x3b9ca600, 0x3f76ac7f, 0x3d3f3570, 0x3e70e1b4, 0x3e3830f8,
0x3f3f9678, 0x3e1fb9dc, 0x3dd55dd0, 0x3aef4000, 0x3f433347, 0x3df08b90,
0x3eb33e84, 0x3dc553c8, 0x3ee7a2f2, 0x3e377d78, 0x3f3e7fdb, 0x3f1236d7,
0x3ea73b60, 0x3e0cc410, 0x3f57cffe, 0x3f1253af, 0x3eeea1d6, 0x3f116d9b,
0x3de6adb0, 0x3f335bd0, 0x3f459139, 0x3f221418, 0x3e089dac, 0x3f208980,
0x3ec148ac, 0x3dfe44a8, 0x3f434e54, 0x3f207099, 0x3f6d610b, 0x3f4929fb,
0x3f6fef6c, 0x3e96ad46, 0x3f4d17d4, 0x3e9b4166, 0x3eabb14a, 0x3b020c00,
0x3f09f77a, 0x3f68586f, 0x3ef184c6, 0x3f1b4509, 0x3df6bf78, 0x3f73cdae,
0x3f71564c, 0x3e0d0f10, 0x3f25afae, 0x3f4cc291, 0x3e5e4880, 0x3f544618,
0x3f165f1a, 0x3f0d128c, 0x3f0459a7, 0x3e24cc88, 0x3eb49e0a, 0x3f247e0f,
0x3f19b458, 0x3e607e7c, 0x3deeb150, 0x3ee86844, 0x3ed8b3e2, 0x3efaeafa,
0x3eca4cde, 0x3f4c835c, 0x3f71404c, 0x3f6162dc, 0x3eb60aea, 0x3d6169d0,
0x3d298250, 0x3f3facbb, 0x3dc7db50, 0x3f476996, 0x3d84cdb8, 0x3e8381c8,
0x3ebc1c56, 0x3edda720, 0x3f327cb8, 0x3ec91382, 0x3f567352, 0x3f60a3cf,
0x3e63cdd0, 0x3f2fbe4e, 0x3f61a525, 0x3e4d10dc, 0x3d8119d8, 0x3f706446,
0x3f566a2d, 0x3f263a00, 0x3f68176a, 0x3f6d8d15, 0x3f6c6af8, 0x3e741c84,
0x3f182317, 0x3e885446, 0x3f479de4, 0x3f7de85f, 0x3ec4857c, 0x3ebc271a,
0x3f143fe7, 0x3e0877c4, 0x3f75c402, 0x3ef8b408, 0x3f748088, 0x3f10c163,
0x3dbccff0, 0x3f32b1fc, 0x3f674b45, 0x3f0139d4, 0x3ef9b882, 0x3f64196c,
0x3defe888, 0x3dd093e8, 0x3f2b3835, 0x3f4fb95d, 0x3f7e460c, 0x3e24b3c8,
0x3e5a5130, 0x3f0208d2, 0x3f6cb694, 0x3f0b2b70, 0x3f1eaf41, 0x3ec30552,
0x3f13f950, 0x3e405394, 0x3e9bf8c4, 0x3d5b8a00, 0x3d954c60, 0x3f2aaf0d,
0x3f47c963, 0x3f48e285, 0x3f5108e1, 0x3dd6f800, 0x3f623988, 0x3f22332e,
0x3f39bf14, 0x3f37a5a6, 0x3f57aec5, 0x3f49ad86, 0x3efb3ac4, 0x3ee00cf8,
0x3f1e8a44, 0x3f05f896, 0x3f10619b, 0x3e1a016c, 0x3f667a9f, 0x3d823af0,
0x3dc83748, 0x3f332ca5, 0x3f25f83e, 0x3f52f762, 0x3f5d1d04, 0x3ed5bac2,
0x3eb30346, 0x3f5ce83e, 0x3f1fe917, 0x3ee4e7f6, 0x3ef79562, 0x3f577e28,
0x3d910f88, 0x3f427613, 0x3e94929e, 0x3ea4fce6, 0x3f23ee9a, 0x3d1c8fb0,
0x3d01a550, 0x3f42d47f, 0x3da77a08, 0x3f60995e, 0x3c0a9380, 0x3e88165e,
0x3f0ad115, 0x3f45a98a, 0x3ed80798, 0x3f1d936a, 0x3f0e07fa, 0x3e8e1ef2,
0x3ee0618c, 0x3dbbedc8, 0x3eb6713c, 0x3f22cf06, 0x3f1df573, 0x3f7cf87a,
0x3f19492d, 0x3eb07ab6, 0x3edd0088, 0x3e2a9198, 0x3defc080, 0x3f340f50,
0x3f773033, 0x3f3054c9, 0x3f76f70a, 0x3e66eb1c, 0x3f676d3a, 0x3ebeb408,
0x3f26fe18, 0x3f703110, 0x3ef2b336, 0x3f319c97, 0x3e3adb10, 0x3d1b3af0,
0x3f3b0802, 0x3e1b7498, 0x3f2f4afe, 0x3f0b8386, 0x3f285fa0, 0x3f43b72e,
0x3f5d486b, 0x3f65dcf2, 0x3e66fba4, 0x3eeb2f94, 0x3b42bd00, 0x3f62fc54,
0x3ea805d4, 0x3eb4fe52, 0x3e1fab68, 0x3e6362b8, 0x3f73f5a9, 0x3f7103c8,
0x3f439d03, 0x3f0451db, 0x3c9a8dc0, 0x3e3ccb48, 0x3f419ff7, 0x3f1d8aa7,
0x3ec10a20, 0x3e752480, 0x3f5b1273, 0x3f557bf6, 0x3e20a7b0, 0x3e701b88,
0x3daf0738, 0x3f58dfea, 0x3f5b49d0, 0x3f79dc64, 0x3d35c960, 0x3f5e3797,
0x3e9a238c, 0x3dbb67a8, 0x3f56999b, 0x3f6b147d, 0x3f450ebd, 0x3f0e0ba6,
0x3f42ab7d, 0x3e22fa34, 0x3e491070, 0x3f4b6181, 0x3f3cfea2, 0x3f773612,
0x3f37c12c, 0x3f11311c, 0x3e8e5a48, 0x3f5b9871, 0x3e184f28, 0x3f349900,
0x3f668a71, 0x3f618f59, 0x3d072ca0, 0x3eea0f1e, 0x3e109d28, 0x3edfbf9c,
0x3edb0210, 0x3f7f6eff, 0x3e478798, 0x3e182b74, 0x3e306d94, 0x3ebdac5e,
0x3ebfa2ce, 0x3ec22efa, 0x3db01a70, 0x3c854b20, 0x3f44eb5b, 0x3f342728,
0x3f131358, 0x3f390480, 0x3f154485, 0x3f54c400, 0x3e4200a0, 0x3f4477e4,
0x3f164d00, 0x3e7f1908, 0x3e10028c, 0x3debfcd8, 0x3eb5adde, 0x3f361860,
0x3f0955a3, 0x3f79dc1a, 0x3e254178, 0x3cd96800, 0x3e8f8ee8, 0x3f28e2cb,
0x3ee6297c, 0x3f4b6ffb, 0x3f451d83, 0x3f603f0b, 0x3eb77766, 0x3f22176f,
0x3e81b948, 0x3f47cfda, 0x3f10581d, 0x3f56ca4d, 0x3b0bb400, 0x3f4a6862,
0x3ed19728, 0x3dceec28, 0x3edeafc6, 0x3f0e3fea, 0x3f26b23b, 0x3f6d93ef,
0x3ebf908a, 0x3e99d570, 0x3e814d2c, 0x3f32ea59, 0x3f62acaf, 0x3f4152b9,
0x3e2d777c, 0x3e3f6a7c, 0x3ec70f36, 0x3ec7937c, 0x3f1c251e, 0x3f26d5de,
0x3e951d02, 0x3eaf0570, 0x3f04f161, 0x3ed8d624, 0x3e437414, 0x3efeceba,
0x3e8838fe, 0x3f168c18, 0x3e1a49a8, 0x3f1ae81d, 0x3eb71206, 0x3e61cee8,
0x3ecdd5a4, 0x3e418040, 0x3edfb0b6, 0x3e38040c, 0x3f76051f, 0x3ec0ee50,
0x3f6e6777, 0x3f57f7f9, 0x3f5ba562, 0x3ef21660, 0x3e8c7112, 0x3e5ab544,
0x3e827f6a, 0x3f261474, 0x3e9237d6, 0x3e5cd520, 0x3f5ab4cb, 0x3e81e382,
0x3d0e0a40, 0x3d0e87e0, 0x3f3fecc7, 0x3eaa5012, 0x3edeea7a, 0x3f1ad9aa,
0x3f059183, 0x3eb3fbb8, 0x3f014789, 0x3e4747a0, 0x3f1f716e, 0x3b714000,
0x3e852fde, 0x3c56b680, 0x3f30cc9a, 0x3f04b414, 0x3efcfc4a, 0x3f70c129,
0x3eacc898, 0x3ea23a38, 0x3f1f14e9, 0x3f50d1f7, 0x3f147713, 0x3f6be6c0,
0x3f3483e7, 0x3eea910a, 0x3e591a9c, 0x3f5c5ad6, 0x3d514e70, 0x3f4a49a7,
0x3f37fad8, 0x3f46540c, 0x3f74a543, 0x3f40679c, 0x3f4abe61, 0x3e178ffc,
0x3eae56a8, 0x3edbefee, 0x3da237b0, 0x3ed100d2, 0x3e92d8a6, 0x3dec6e58,
0x3f1317ce, 0x3f12ab4d, 0x3f45daaf, 0x3f5fe2b6, 0x3e42cb94, 0x3f7a260c,
0x3f0d94d4, 0x3e2f4678, 0x3f5da986, 0x3f3ecd03, 0x3f6e32c6, 0x3eb08d0e,
0x3ea099be, 0x3e11011c, 0x3f766f53, 0x3e6611c4, 0x3f5d2b28, 0x3ee70ede,
0x3f658ee8, 0x3d2f4970, 0x3f1d4a31, 0x3d9109c0, 0x3ce695e0, 0x3f7f63bf,
0x3e8fd860, 0x3e3e820c, 0x3f0d6155, 0x3f032f81, 0x3f10d55d, 0x3f6d3b5e,
0x3f02d3a0, 0x3f56b825, 0x3f146fa0, 0x3de117f0, 0x3ef0853c, 0x3f6a0c73,
0x3de4ad70, 0x3e9a66fc, 0x3f18cfc5, 0x3f1944d9, 0x3f3f06b1, 0x3ef8a9a6,
0x3f298468, 0x3ecc4c4c, 0x3f123787, 0x3d1cee30, 0x3f3b25da, 0x3f6fd971,
0x3f5c07c3, 0x3f34bfaa, 0x3e91d31a, 0x3f33a6b7, 0x3ed4457e, 0x3ca0a0e0,
0x3e968518, 0x3da00258, 0x3f0f0d86, 0x3e6f51dc, 0x3f20c172, 0x3f109e04,
0x3f3c4c11, 0x3f33be00, 0x3d69d2d0, 0x3efcff56, 0x3f5730f0, 0x3edf8088,
0x3e1b1620, 0x3e0a22dc, 0x3f0a1397, 0x3eea6736, 0x3e09b75c, 0x3f6a9bfc,
0x3e64d1b8, 0x3f580cd8, 0x3d290cc0, 0x3eae325e, 0x3f02dad5, 0x3e057dd8,
0x3f024ff2, 0x3f2c3e6f, 0x3f2a014a, 0x3f52bd7b, 0x3e9b5a80, 0x3dc798e8,
0x3ebea594, 0x3f0bd9a6, 0x3f1c50ef, 0x3f468365, 0x3e5554cc, 0x3ec9fdf8,
0x3f781a74, 0x3ee3854a, 0x3f280dd5, 0x3e997f22, 0x3f42c1ab, 0x3efc99ea,
0x3f36d3c3, 0x3f735e96, 0x3f0da6b8, 0x3f166eb5, 0x3f284366, 0x3f7982ae,
0x3f7f159b, 0x3f31652b, 0x3e4a43b8, 0x3eb27360, 0x3f7e42e8, 0x3ea3d00a,
0x3ed03742, 0x3e89dd14, 0x3f0f4624, 0x3da20d48, 0x3e9bbeda, 0x3e9af660,
0x3ed915e6, 0x3f0495dd, 0x3f07c35c, 0x3f118423, 0x3f382ace, 0x3ec0c056,
0x3f6df319, 0x3e1030fc, 0x3dd09b68, 0x3c62da80, 0x3ec9ce0c, 0x3f35a223,
0x3f4a9439, 0x3f459f23, 0x3f5971aa, 0x3e220b1c, 0x3e5c371c, 0x3f5be3f7,
0x3ed1ecf8, 0x3ee23da0, 0x3f5cba00, 0x3d643c00, 0x3e187990, 0x3da69340,
0x3f5910ef, 0x3f426233, 0x3cc6e800, 0x3ef1344e, 0x3dfae618, 0x3f60b7f5,
0x3f1942a2, 0x3ea30810, 0x3f5892c1, 0x3e1f3270, 0x3f0dac81, 0x3f1cfb60,
0x3f108bc3, 0x3f54a5b3, 0x3f232e5b, 0x3f287d6a, 0x3f0efc17, 0x3f786b06,
0x3dfe15e8, 0x3f621efb, 0x3e5e68d4, 0x3f5fb5c0, 0x3ee6d8ca, 0x3f55f16d,
0x3f656889, 0x3f5bb5c0, 0x3f1cf6c7, 0x3f320bfc, 0x3ed2ad8a, 0x3eb98256,
0x3f4aeaf3, 0x3f137f9e, 0x3f410d87, 0x3d991dd0, 0x3e569a0c, 0x3ed7cafe,
0x3e668c48, 0x3def9d98, 0x3f197f5b, 0x3e8e60d6, 0x3f31e438, 0x3f2ab320,
0x3db0ee68, 0x3f3ba15f, 0x3e223e2c, 0x3ef1a91e, 0x3d040020, 0x3f308052,
0x3f4cc3a8, 0x3ebdff66, 0x3f640150, 0x3f285ea6, 0x3f3ba978, 0x3e23e124,
0x3d465d60, 0x3f34b0c8, 0x3d886860, 0x3e88cc22, 0x3e0d2ad4, 0x3f446a6a,
0x3f5d5169, 0x3e960c80, 0x3f140f03, 0x3e27fde0, 0x3cf0dd40, 0x3d3a6940,
0x3f43937e, 0x3c2b1f80, 0x3f488a07, 0x3e418210, 0x3f7cb6bd, 0x3e4f0bf0,
0x3f019d08, 0x3d810b18, 0x3f55e69d, 0x3f2fb0bf, 0x3f65ec48, 0x3f1f602f,
0x3efdd75e, 0x3e0104f0, 0x3f3e3b36, 0x3e651c0c, 0x3f01362b, 0x3eb4c58a,
0x3f1e99ac, 0x3f1c85ab, 0x3f793d8d, 0x3ed75d7a, 0x3f37e2a9, 0x3f5fe002,
0x3f541199, 0x3f27737e, 0x3f354703, 0x3f19231c, 0x3f7e2bde, 0x3f788080,
0x3e910fb4, 0x3eb9a258, 0x3e7d73d8, 0x3f40c445, 0x3eea30f6, 0x3f4e1083,
0x3f5f484b, 0x3f165b7a, 0x3ebb4c5e, 0x3db7c988, 0x3d6a4e60, 0x3d8dffa8,
0x3f75cb55, 0x3f4924cc, 0x3f7589a7, 0x3e3ba718, 0x3e5b64c4, 0x3f15130b,
0x3ef78d22, 0x3eca0304, 0x3e3f3a18, 0x3da92190, 0x3e812406, 0x3eb1109e,
0x3e84898c, 0x3f10d994, 0x3d43c8a0, 0x3f044912, 0x3f006ab8, 0x3f4ecb83,
0x3f0c933a, 0x3f5ee4ab, 0x3d297a30, 0x3f1cb629, 0x3f476f1e, 0x3d8f4010,
0x3ec0a59e, 0x3e3780b8, 0x3f55d398, 0x3f11230d, 0x3f7b83d5, 0x3ddb9ed8,
0x3ed072d2, 0x3f35bad5, 0x3eefee28, 0x3effc15e, 0x3f1a8c66, 0x3e40f244,
0x3f3d1e68, 0x3ecf06e8, 0x3e6e97fc, 0x3df891f0, 0x3f6c646f, 0x3f132603,
0x3f755d4b, 0x3f030eb4, 0x3f069de3, 0x3f18d89b, 0x3ef31d78, 0x3f4dfd0b,
0x3e921c74, 0x3f3cd952, 0x3f632436, 0x3f35bf2e, 0x3f4c3a85, 0x3f23506e,
0x3ec9ddfa, 0x3ec5b4de, 0x3f1ec970, 0x3f519f70, 0x3f2e5652, 0x3f41cd3e,
0x3edc6592, 0x3cb093e0, 0x3e740a40, 0x3eb761fc, 0x3f1a4575, 0x3f204993,
0x3f14b1c6, 0x3f1ab9cd, 0x3f077650, 0x3dca4d18, 0x3eb3e18e, 0x3e8e4ffe,
0x3d8a25f0, 0x3f4c2939, 0x3f193155, 0x3ea8bc8e, 0x3e707ccc, 0x3f4a5d88,
0x3e32faac, 0x3f52839c, 0x3c1bd940, 0x3f318061, 0x3b900d80, 0x3ec6ea42,
0x3ec4a776, 0x3f606dab, 0x3f1a2caf, 0x399c3000, 0x3eda3bd4, 0x3f0c18e1,
0x3f73d7fe, 0x3f15ad0b, 0x3bbcb600, 0x3e2d4114, 0x3f105459, 0x3ba1d600,
0x3e868ece, 0x3f4eeef6, 0x3f0a6cdd, 0x3f0b01ca, 0x3eab4cdc, 0x3f3b253e,
0x3f0d522a, 0x3f04b44f, 0x3f214601, 0x3df97570, 0x3f781e88, 0x3f004a84,
0x3dc4e2c8, 0x3cae3f40, 0x3e981544, 0x3f0fa8be, 0x3f145be8, 0x3f1a41ed,
0x3f611d95, 0x3f6172a1, 0x3f349c83, 0x3e9f1dd2, 0x3f7a2bf5, 0x3f37d399,
0x3f44784a, 0x3d7b3b70, 0x3eb2431e, 0x3f441518, 0x3f0fce8a, 0x3eec22ee,
0x3f4c186b, 0x3e271ef0, 0x3e6a4590, 0x3f174a78, 0x3f36ab45, 0x3f2c7736,
0x3ec17b00, 0x3f752abc, 0x3dc62d48, 0x3f2639dc, 0x3f39aae7, 0x3e44f29c,
0x3e200de4, 0x3eb6e81a, 0x3f45e72e, 0x3f3560a9, 0x3f035e25, 0x3e7bad00,
0x3ef4dd66, 0x3dc91d00, 0x3ebcaa6c, 0x3f649206, 0x3f3875bd, 0x3f1e6d42,
0x3f2790d3, 0x3ef0a232, 0x3db17798, 0x3f65d44d, 0x3e672348, 0x3cd8b4e0,
0x3f7bcf6d, 0x3f3ef25d, 0x3e032ce8, 0x3e938888, 0x3ec79684, 0x3f2f7936,
0x3dda4f60, 0x3e95eede, 0x3f349424, 0x3d9caff0, 0x3f1c2c7e, 0x3f3398e5,
0x3f190799, 0x3e05dd44, 0x3edc72e0, 0x3f3675c1, 0x3f5e5a3c, 0x3f16064c,
0x3f662418, 0x3f3f4247, 0x3f0f3b48, 0x3f25daa2, 0x3f68f840, 0x3f6ffad9,
0x3c630280, 0x3f2b903a, 0x3f2c223d, 0x3effe77c, 0x3f2e2f6b, 0x3eecd0da,
0x3f342867, 0x3f5363b6, 0x3e9e7f68, 0x3f63ad4c, 0x3f3414bf, 0x3e6bcdb0,
0x3f578426, 0x3e962064, 0x3f4db122, 0x3e9c3af6, 0x3ef24892, 0x3f4c6ed5,
0x3f03c0b1, 0x3e9e3aaa, 0x3f0ba870, 0x3f134a31, 0x3f4793dc, 0x3e001670,
0x3d87d9c0, 0x3e9baaf2, 0x3ed92222, 0x3f22a2b0, 0x3ddd1fd0, 0x3f10d7f2,
0x3e288014, 0x3da30988, 0x3f4211a2, 0x3e92778e, 0x3f505e3f, 0x3f643df4,
0x3f0c2459, 0x3e20aef4, 0x3f0349c1, 0x3ee5cf92, 0x3e1474a0, 0x3f4e39d7,
0x3e349b04, 0x3e2639b0, 0x3f0ac524, 0x3efcffac, 0x3e6e2ee4, 0x3f226cb6,
0x3e5bbbc0, 0x3f5edf6b, 0x3f2aa1b4, 0x3f7dabe8, 0x3f13eb6d, 0x3f5b6432,
0x3f34d9a9, 0x3e61deac, 0x3f1d30f9, 0x3d778ee0, 0x3f764987, 0x3f5fb106,
0x3f3dcdc1, 0x3f367073, 0x3e0b0244, 0x3eafcfd0, 0x3f12cb91, 0x3dad9c30,
0x3e9d2594, 0x3dd0d1a0, 0x3d9178d0, 0x3f0e591f, 0x3f7a3f1c, 0x3dba5ad0,
0x3f65a922, 0x3e9dfd50, 0x3ec2c3f4, 0x3f390185, 0x3ea0ba1a, 0x3f53afab,
0x3f33a93d, 0x3ed4cce4, 0x3edbc1d0, 0x3f03e91f, 0x3e728234, 0x3f298ab0,
0x3e6fe3bc, 0x3dac3250, 0x3e8b58f8, 0x3f135011, 0x3eb0a3dc, 0x3f5593ec,
0x3f03f287, 0x3f71fe15, 0x3ec51e3e, 0x3d744960, 0x3f74be06, 0x3f2d671d,
0x3e384d84, 0x3f6f4615, 0x3eb3061c, 0x3f2f80b2, 0x3e69ed90, 0x3e2727b4,
0x3d410e20, 0x3ec6c27e, 0x3f03a310, 0x3edfc2a4, 0x3ea53202, 0x3e0a79b4,
0x3f49b5df, 0x3eab7e3c, 0x3ae05e00, 0x3f72d5e0, 0x3dab7408, 0x3e810eaa,
0x3bcf8e00, 0x391bc000, 0x3e8af156, 0x3f4a7e45, 0x3f4cd234, 0x3ed098fe,
0x3ee4327e, 0x3f766ab7, 0x3e80075e, 0x3f11c187, 0x3e14872c, 0x3f6b1a7b,
0x3f3d8346, 0x3f4c3cf2, 0x3ed51848, 0x3e060280, 0x3ef3f384, 0x3ed91a44,
0x3f2b44a6, 0x3d5edd00, 0x3ecd4ecc, 0x3f282129, 0x3f4233de, 0x3e82de7e,
0x3f7e5ec6, 0x3ecda3b2, 0x3e99ac3a, 0x3f3cda2e, 0x3ebb093e, 0x3ed259d4,
0x3f19d226, 0x3eac68c0, 0x3e7b6568, 0x3e9866b8, 0x3f508994, 0x3e0ce474,
0x3ee3dc62, 0x3ecb4bcc, 0x3f387866, 0x3e772898, 0x3f5bfd02, 0x3e7d2af0,
0x3f7bfbe4, 0x3e9955f4, 0x3c566100, 0x3d376530, 0x3f4e5334, 0x3f496291,
0x3f2b7072, 0x3dcfc480, 0x3e86c298, 0x3f4fd219, 0x3e82f6e0, 0x3f0924f0,
0x3e799b88, 0x3e9ecf24, 0x3de3f120, 0x3ebb2a18, 0x3dce3a20, 0x3eae78de,
0x3f07e440, 0x3f274293, 0x3f47be34, 0x3e3e2694, 0x3edbd970, 0x3f34e9a9,
0x3e963ec0, 0x3f40fb2f, 0x3e1cfe1c, 0x3f6ac0fb, 0x3ebc892c, 0x3e5993d4,
0x3f0a2574, 0x3f431243, 0x3d1f36f0, 0x3f13fa11, 0x3ee0c4b6, 0x3e368e9c,
0x3f1bbc36, 0x3f3fe30e, 0x3ec36554, 0x3eb09934, 0x3e065dc0, 0x3ef9902a,
0x3f6bb3ef, 0x3e9b5008, 0x3e5c78e0, 0x3ec9f760, 0x3e4f2254, 0x3f00d2e7,
0x3edc3108, 0x3f12dd38, 0x3f2f0fb7, 0x3b969c80, 0x3e40b1fc, 0x3f19c592,
0x3f490f1c, 0x3ee4b532, 0x3ea567b4, 0x3f04180f, 0x3c6ab140, 0x3ed87f5a,
0x3da7da00, 0x3df04a10, 0x3f1e5dd8, 0x3e1ba038, 0x3eee17ae, 0x3ec14278,
0x3b2edc00, 0x3f38a28e, 0x3f64000a, 0x3f09a1d2, 0x3f7fc3e6, 0x3cbd65e0,
0x3eb22f14, 0x3f39e132, 0x3e91fa0a, 0x3db1a4f0, 0x3e319080, 0x3ea825ec,
0x3ef927ba, 0x3f43c3c1, 0x3de54748, 0x3c643940, 0x3e873a08, 0x3f040ad3,
0x3f761a55, 0x3f5076ef, 0x3f27e6a1, 0x3c864a80, 0x3f11ab22, 0x3ee89a44,
0x3f7f5c64, 0x3f2fe4e3, 0x3f2cf469, 0x3f1dc003, 0x3f08b381, 0x3c9bccc0,
0x3f3397ba, 0x3df9d740, 0x3e227020, 0x3f67225d, 0x3d43b5c0, 0x3b13a000,
0x3e850468, 0x3eba37b4, 0x3f68d6ed, 0x3eaff3d4, 0x3f4f2667, 0x3efd5d96,
0x3d286520, 0x3e031f88, 0x3f7f9ed7, 0x3f18f491, 0x3f62acfc, 0x3ddaf220,
0x3e89721c, 0x3e5d693c, 0x3f417389, 0x3f71cf35, 0x3ecfd50e, 0x3c1c3a80,
0x3f0af1a6, 0x3ec114c6, 0x3e0b9774, 0x3ede32d4, 0x3f4e661e, 0x3f687d66,
0x3f3d77ac, 0x3f6f0343, 0x3f641d5d, 0x3f63e789, 0x3ea55fac, 0x3cd34ac0,
0x3f72a8f4, 0x3f160c23, 0x3ea2d36c, 0x3eb17e60, 0x3f2b9905, 0x3f67672c,
0x3ed67d9e, 0x3f290f1b, 0x3e28541c, 0x3f657fd6, 0x3f0b24da, 0x3ef8e85c,
0x3f1f0187, 0x3f04ea25, 0x3f35ce89, 0x3f66cd7c, 0x3f477cad, 0x3d8efe60,
0x3f7778f2, 0x3ceb0ec0, 0x3f3e30db, 0x3eb27386, 0x3f331d5d, 0x3f345742,
0x3eb82282, 0x3e109c08, 0x3ed02474, 0x3f3ecea4, 0x3f59f84d, 0x3ee0357a,
0x3e7222f0, 0x3ecca506, 0x3f02d0a6, 0x3eef0a28, 0x3f4310eb, 0x3f026842,
0x3dc64630, 0x3f0d205c, 0x3ed7565c, 0x3e9f210a, 0x3e1527f4, 0x3edc3884,
0x3f1dbe71, 0x3ed243ba, 0x3f1268ea, 0x3eabb42e, 0x3d1abe50, 0x3f5c3655,
0x3f490120, 0x3ea62188, 0x3f6732ed, 0x3e6cca60, 0x3f7118d7, 0x3ede6e68,
0x3ed7550e, 0x3f3b981c, 0x3e3a1adc, 0x3f1265a5, 0x3d954db8, 0x3efae76c,
0x3f6f78f9, 0x3f5f59bb, 0x3f368f94, 0x3ea61b26, 0x3e5a004c, 0x3f00fb35,
0x3d216a30, 0x3ebd061a, 0x3f3217ec, 0x3e1c9fc8, 0x3e7aee48, 0x3f7b5503,
0x3f4c506d, 0x3f7ccbec, 0x3f69ab33, 0x3f54b76f, 0x3f2ffe47, 0x3e9b4d50,
0x3ed17370, 0x3f6b8ba6, 0x3f096fbb, 0x3e25fe6c, 0x3db3ed90, 0x3d8470b0,
0x3ef83fea, 0x3f158e41, 0x3f6a0bf2, 0x3f3c0de9, 0x3e070158, 0x3f6ecaf7,
};
// 10,8,5,5
uint32_t kernel_vals[] = {
0x3d3114e7, 0x3d4407ad, 0xbd35d912, 0x3ca7c94a, 0xbc56a7e8, 0x3b948e90,
0xbccbb9a0, 0x3c2b9b28, 0x3ca02e4e, 0xbcceb383, 0x3c6a04fc, 0xbd37c660,
0xb799d800, 0xbc5c8848, 0xbc4ae274, 0xbcf0a620, 0x3cb33d9e, 0x3d261659,
0x3cc7aeb6, 0x3d326067, 0x3c9c9e26, 0xbbcc0050, 0x3cd0ac2a, 0xbc893ff4,
0x3b8b1050, 0xba428000, 0xbd315ffa, 0xbd0f4ef5, 0x3bbcf490, 0xbc2ab878,
0x3bc68180, 0xbbc9bb68, 0x3cd18a86, 0x3c96670e, 0x3c22f178, 0xbca5d14a,
0xbca34e20, 0x3c69da2c, 0x3c012fc8, 0xbc4e8c78, 0x3c6c85a4, 0xbc8a1926,
0xbc54d694, 0xbd031dd0, 0xbc5f05c0, 0xbbdf5d98, 0x3cfff456, 0xbc9b11c7,
0xbd0435ce, 0xbd0479da, 0xbb11a930, 0xbd09e01a, 0xbcae6513, 0x3c897392,
0xbd33a047, 0xbc90b650, 0xbbfc8990, 0x3c8228ee, 0xbca793ea, 0xbd149155,
0xba0b0b40, 0x3cf9af0e, 0xbd20aafd, 0x3b9c4c68, 0xbd08876d, 0x3c3bf5c0,
0xbc85b67a, 0x3c955286, 0x3c4ab648, 0xbca8e4b7, 0x3c4cdf44, 0xbccb04c3,
0x3c22b794, 0xbd0e93a0, 0x3d2b04dd, 0xbc6033f4, 0xbccbc0f7, 0xbd0e3688,
0xbc4bfcd8, 0xbd37700a, 0xbd4b06a7, 0x3c0ceeec, 0xbbdb7928, 0x3c47f720,
0x3d3832a9, 0x3bd083d8, 0xbd420c63, 0xbd20b7cd, 0x3d284029, 0xbd2f3a1d,
0x3cdc94ea, 0x3cc68052, 0xbc0ab8e0, 0x394d6e00, 0xbd1fc3aa, 0x3c4e2404,
0x3d0adb4d, 0x3c6f5e74, 0x3d373d99, 0xbcd89817, 0xbc582354, 0xbb25eea0,
0xbd33a903, 0xbcc14be7, 0x3b5d7630, 0xbc550a98, 0xbd280dfd, 0xbd412b6f,
0xbcda4e57, 0xbb931290, 0xbcd13840, 0xbd378128, 0xbb4bacb0, 0xbc816b44,
0x3cc4982e, 0xbbf372f0, 0xbc1ece18, 0xbc8989d0, 0x3d2dbdf9, 0xbd2d3ab0,
0x3d4754e3, 0x3c4187f8, 0xbcbd2fdd, 0x3c945352, 0x3d080845, 0x3b240150,
0x3c131a98, 0x3b7fc8a0, 0x3d282079, 0x3c047518, 0x3c9ccfca, 0x3d252367,
0x3d14eb05, 0x3d2ee1b1, 0xbc832ce6, 0xbb9290b0, 0x3ced2af6, 0xbbcd5880,
0xbd237b88, 0xbc38d38c, 0x3cd2775a, 0x3c209b68, 0xbcc059b3, 0xbc2d7688,
0x3c3664a8, 0xbd444938, 0x3bb62998, 0x3cfce4ea, 0xbd2647d2, 0x3c4f8f54,
0xbcc7f663, 0xbc706940, 0x3cf03666, 0x3c894e02, 0x3cdd4b22, 0x3d3058e5,
0xbd178a16, 0xbd33a122, 0xbcaf84fa, 0x3d2b357f, 0xbbcc8510, 0xbcf1e24d,
0x3d1811bb, 0x3d07983b, 0x3d00c77d, 0xbd367605, 0xbd4672e3, 0x3d0419c7,
0x39b31800, 0xbd492abb, 0xbc9b6eea, 0x3be18d70, 0xbd41a34a, 0xbcfcf530,
0x3cfcab42, 0xbd3e81a2, 0xbd421e7f, 0xbcc11efd, 0xbca63d6d, 0xbd331545,
0xbd38f0bd, 0x3d496ed7, 0xbc17b734, 0x3c3b45f4, 0x3c64196c, 0xbd417f67,
0x3d15ae6f, 0x3d14b5f5, 0x3c64e8bc, 0x3b57aae0, 0x3c5c3774, 0xbcca7973,
0xbcded7b3, 0xbcb2267d, 0x3ca850b6, 0xbd09ca34, 0xbcfc9c53, 0xbc99dc4d,
0xbd2dda8b, 0xbd104bc0, 0xbcd2fcc7, 0xbbbd1f80, 0x3ba3d618, 0x3b924eb0,
0x3c0f8a6c, 0x3cc38ea2, 0xbca04520, 0x3b4b43d0, 0xbc6d4e08, 0x3c1c136c,
0x3d0ad6ab, 0x3c7f40fc, 0x3d0add39, 0x3d06e91b, 0xb8853000, 0x3d46d18b,
0x3c98251a, 0xbc107654, 0xbc49e4ec, 0xbc4a6e8c, 0xbcc6af4d, 0x3d181b39,
0xbcf100ed, 0x3bed0c00, 0xbacbcf40, 0xbc2304c0, 0x3d1b6291, 0xba2194c0,
0xbc3212ec, 0xbbecaeb0, 0xbd425452, 0xbcb6dac3, 0xbc86e604, 0x3cccd70a,
0xbcc3d7aa, 0xbba5a570, 0x3c4da1fc, 0xbcbb9c3d, 0xbcf26c8d, 0xbd38e4c7,
0xbd4ab0b3, 0xbb218ae0, 0x3cce9f6e, 0x3c6a84a4, 0x3c8fbf5a, 0x3c20d718,
0x3cd7200a, 0xbcf3275d, 0xbca6530a, 0x3cd43cfe, 0x3d1aa751, 0x3d1daee3,
0x3cbf75f2, 0xbb8c1c50, 0x3cf04506, 0xbd43d9c2, 0xbbe133c0, 0xbc95d02a,
0x3a580cc0, 0x3d433091, 0xbd310a97, 0x3d22b219, 0xbd20c68d, 0xbcf093a3,
0x3a90b0c0, 0xbcd4a277, 0xbcc4ea5d, 0x3ba52110, 0xbd4584b0, 0xbc4892e0,
0x3cf9cef2, 0xbd202d7b, 0xbcf8329d, 0xb9317b00, 0xbb02cb60, 0x3d16a987,
0x3ccd0ae2, 0xbd0e07bb, 0x3ce5afe2, 0xbcba3e53, 0xbd004140, 0x3c727284,
0xbd3100aa, 0x3ce1384a, 0xbc7980ac, 0x3d220849, 0xbd3db48b, 0xbd401a28,
0xbca574ea, 0xbc3922f4, 0x3d031b4f, 0xbd32a3f0, 0xbd2c5190, 0x3d1b5ce1,
0x3c8da5b2, 0xbd1adf65, 0xbd3eaf7f, 0xbd40fb2d, 0xbc019894, 0xba3c1140,
0xbcf569ad, 0x3bede0a8, 0x3b1b9230, 0xbd23010b, 0x3c740fcc, 0xbbd867c0,
0xbc17c908, 0x3b348ca0, 0xbc5dd360, 0x3d2a569d, 0xbcdc6527, 0x3d15f95b,
0x3c943d1a, 0x3b68f8d0, 0xbce9bb5a, 0xbc0014b4, 0x3d0229a5, 0xbd4ba5e0,
0x3d13459b, 0xbab304c0, 0x3d053451, 0xbc52e2cc, 0x3c0c96a8, 0xbd334520,
0x3cc7999a, 0xbafba400, 0x3c4b8ce8, 0x3d3f28c9, 0x3d3959cd, 0x3ca50e6e,
0x3c64cc2c, 0xbd4c667b, 0xbbba0840, 0xbcf05baa, 0xbb70df60, 0x3c910432,
0x3c84d512, 0xbd388aaa, 0x3c8acbf6, 0xbc3d9808, 0xbcda55a7, 0xbc24b518,
0xbcc722f0, 0x3cad76be, 0x3c70c6dc, 0x3d2b11e3, 0x3d080f31, 0xbc220d2c,
0xbd3703ba, 0xbd191162, 0xbc6c6f40, 0xbd1de1dd, 0x3d1235e5, 0x3d09d783,
0x3ccdc1ee, 0xbd1bc0b0, 0x3d100d91, 0x3d328b8f, 0x3c9d09ae, 0x3ccd7882,
0x3d4b1a4d, 0xbd093d0c, 0xbd4c717f, 0xbceb60ea, 0x3b2b4ea0, 0x3cf9e1ea,
0xbd493907, 0x3d3ce3f1, 0x3d195011, 0xbca6a497, 0xbcc9e50d, 0xbcc9a8b7,
0xbd2c719d, 0xbd1ed948, 0xbc243d94, 0xbcdb1f83, 0x3ca5dcfe, 0xbd4afb10,
0x38343400, 0xbc8c7d06, 0x3d1dc93f, 0x3d4ada1d, 0xbc86d956, 0xbce683e3,
0x3d0fffe1, 0x3b17b100, 0x3c475238, 0xbccf00f3, 0xbb9a41d0, 0xbd1a502d,
0x3b5ba7d0, 0x3d45967d, 0xbd119e3b, 0xbc7f0188, 0xbd0cdef0, 0x3c0efb68,
0x3d3dd0f3, 0xb7ac8000, 0xbcab8b77, 0x3cba91c6, 0xbc100de0, 0xbd4bd305,
0xbbf6a4d8, 0xbca78a53, 0x3c83d052, 0x3d393393, 0x3ccea7ae, 0x3d1e4b01,
0xbd2825a6, 0xbd18795e, 0x3c6bafd4, 0xbc644f88, 0xbd2ce9d7, 0xbc0d95d4,
0x3c083834, 0x3b0057b0, 0x3cc75282, 0x3ce1beba, 0x3c3a97ec, 0x3bd0a898,
0xbcd2478a, 0xbccdefdd, 0xbc0876a8, 0x3bfed400, 0x3cc8e346, 0xbc8e1f0a,
0xbca92707, 0x39f45d00, 0x3c270728, 0xbc208c78, 0x3b499c00, 0x3d4866f5,
0x3b1b1fb0, 0x3c9e40d2, 0xbd087ff6, 0x3ca2bef2, 0xbca468d3, 0xbca16b1a,
0x3d3addf5, 0x3d0e80bf, 0xbc78d1ac, 0xbcf4ff6d, 0x3d12995b, 0x3b26b4d0,
0xbd02b830, 0x3c2f7634, 0xbd38ff10, 0x3ca8f88e, 0xbcc0a01a, 0x3d3e36f3,
0x3ce4f236, 0xbc57488c, 0xbc873f94, 0xbd078f10, 0x3c5c97fc, 0x3d26b433,
0x3c5f45f4, 0xbcb806a7, 0xbcf658aa, 0xbd4a8470, 0x3d1ac939, 0xbbb171c0,
0xbd00ee5e, 0xbc93b7e4, 0x3c21d4a8, 0x3d1a4def, 0xbd15782e, 0xbca9c733,
0xbd0d9e3b, 0xbcfdea43, 0xbcbde660, 0x3cb42d8e, 0xbd206ac0, 0xbae99a00,
0xbc220d0c, 0xbccb22e0, 0x3d166429, 0xbd068cfd, 0x3d05072b, 0xbcfbdd43,
0xbcb96ea7, 0xbb806270, 0xbc42d22c, 0xbc99f550, 0x3d13b6ef, 0xbc7b5968,
0xbcc11cb0, 0xbcd22397, 0x3d467733, 0x3d437e0f, 0x3ce33436, 0x3d45e69f,
0xbcb4e1d3, 0xbc9d780d, 0xbd44eddb, 0xbc9f8fca, 0xbcf78a10, 0xbc667634,
0xbbc440b0, 0x3c4219ac, 0x3bfc1290, 0xbabf0aa0, 0xbd0e8156, 0xbcd89f10,
0xbd22bc6a, 0xbca2091d, 0xbd231f4b, 0xbbb9ed70, 0xbc4c8ce8, 0x3d302005,
0xbce67d5d, 0x3d3315ab, 0x3d42b557, 0xbcfb3853, 0x3cbf22fa, 0x3c12c0b8,
0x395ae800, 0xbd13572e, 0xbc916986, 0xbc828f20, 0xbd0918b5, 0xbc012328,
0x3c289e98, 0x3d3b4c3b, 0xbcc988c0, 0xbce724a7, 0xbcba939d, 0x3d081539,
0x3c1c8748, 0xbd27860b, 0xbbd36d68, 0xbd32ff08, 0x3a07c480, 0x3b68ad60,
0xbc95b244, 0xbb803750, 0x3d304595, 0xbc1a6028, 0xbca8c7c3, 0xbd2183eb,
0x3bfa09e8, 0xbcf657b7, 0x3bff8f70, 0xbc4a8ccc, 0xbd08d850, 0xbd2ac862,
0xbc7f8300, 0x3cad9fc2, 0xbcbab96d, 0xbc097d78, 0xbc7fad2c, 0x3c0f1f14,
0xbc849b46, 0xbd497d13, 0xbd00be2c, 0x3bb30530, 0xbd0d0112, 0xbc06f720,
0xbc8ddc4c, 0xbcc89d13, 0x3d202a01, 0xbbaec7d8, 0x3d29e3b7, 0xbd1a09f5,
0xbca13973, 0x3cd3cd26, 0x3cebb3f6, 0xbbe50af0, 0xbd35d98f, 0x3d1f7d17,
0x3d236eef, 0xbb822f98, 0x3b77e3b0, 0x3d406aa1, 0xbccda04d, 0x3d213933,
0xbd29efdd, 0xbb52e030, 0x3cc425a6, 0xbcad5aa3, 0xbd0edd9d, 0xbc4fd994,
0x3c731dd4, 0xbc936a74, 0x3c092048, 0x3b8cdf68, 0xbd359ca3, 0x3a916860,
0x3d16e051, 0xbc452278, 0x3cff2f52, 0xbc2aa378, 0x3b1f33e0, 0xbd1008a5,
0x3d1396bd, 0xbbcee730, 0xbd32750b, 0x3c5e0074, 0xbd1d38e3, 0x3d17c565,
0xbcc91663, 0xbc58e3a8, 0x3c7060e4, 0x3d0aa399, 0x3bf3e110, 0xbd23fdb5,
0x3cee8352, 0x3d28a7f7, 0xbc4de580, 0x3cdc852a, 0x3d0e4c21, 0xbb4875b0,
0xbbd2b018, 0xbd0cd62a, 0x3c750ec4, 0xbca804bd, 0x3b607880, 0x3cb1ab1a,
0xbb9b9640, 0x3c425e0c, 0xbab5cfa0, 0xbd3bd7c3, 0x3d4cb99d, 0xbd2adf2d,
0x399faf00, 0xbb450930, 0x3c62e114, 0xbcfb6890, 0x3d19b807, 0xbc333088,
0x3ca1ce42, 0xbca8fe90, 0x3c00c3c8, 0x3d0f85ad, 0x3c4a3528, 0xb9c2e680,
0x3b587fe0, 0xbc6fd8e0, 0x3ca98c0a, 0xbabaeb40, 0xbbb38168, 0xbcd55fda,
0x3cabf766, 0xbbf93d10, 0x3d2666ab, 0xbccbd870, 0x3cb013da, 0xbc8de3f0,
0x3c853306, 0x3ca6a16e, 0x3d439811, 0xbb590460, 0x3b920898, 0x3b85bc10,
0xbce92ce7, 0x3c6c3284, 0xbafe8960, 0x3c945cc2, 0x3c754a7c, 0xbc2abab8,
0x3c7b58dc, 0x3d08e483, 0xbd126588, 0xbc968340, 0x3d24cd49, 0x3cb3d2da,
0xbd2d76eb, 0xbc813a44, 0xbd39e80d, 0x3cc53a6a, 0x3d0ebf09, 0xbbb9a7f0,
0x3d0b9495, 0xbcee629d, 0x3ce14c82, 0x3c8c3152, 0xbbac1070, 0x3cf3a29e,
0x3cf1d7da, 0x39dc3700, 0x3d485977, 0xba38fb80, 0x3cfcefb2, 0xbcc5326d,
0xbd0244a4, 0x3ae3e240, 0x3ad2db40, 0xbd248bd0, 0x3d4c15c9, 0x3bbe53a8,
0xbcc67bc0, 0xbd080328, 0x3b610de0, 0x3c2f094c, 0xbd40ed1d, 0xbcea71b3,
0xbcf7154d, 0x3d30698f, 0x3cd21802, 0x3c18a814, 0xbcd07c67, 0x3cfa565e,
0xbcef7d00, 0x3c8ba85e, 0xbc8159b0, 0xbca6ffcd, 0xbd05df9a, 0x3c309480,
0xbd0d905e, 0x3d2f28ab, 0x3ab1e760, 0x3c6e6cc4, 0x3d0dced9, 0x3be71b70,
0xbd01b3b6, 0x3d3f7f8b, 0xbbb3e6b0, 0x3c429918, 0x3cdf0662, 0xbba3ee28,
0xbca5aaed, 0xbaa6f360, 0xbd352b5f, 0xbce29c30, 0x3bae5b50, 0xbcf5ecd3,
0xbd1b9263, 0x3c6e55fc, 0x3d095799, 0x3cfc7d6a, 0x3c90a572, 0xbab16840,
0x3cbcd04a, 0x3a97d940, 0xbd04a19c, 0xbd42e445, 0x3c595cd4, 0xbc7c71c0,
0xbd31da0d, 0xbc962a74, 0xbd0c49b0, 0xbd1443b5, 0x3a8b8060, 0x3d2a8f6d,
0xbc04f974, 0xbd1fdeb0, 0xbd3aed78, 0x3c4628e8, 0x3d2145d5, 0xbb6fd580,
0xbc8fa2da, 0xbcced14a, 0xbadfd860, 0x3ce723f6, 0xbd28aca5, 0xbca54a13,
0x3d45bed1, 0x3cd6db22, 0x3c8338ba, 0xbd45e5e7, 0xbd330b0d, 0xbce8685d,
0xbd47ad03, 0x3c0cfcc0, 0xbd2a62ba, 0x3cbd023a, 0x3d49da49, 0x3c23ee28,
0x3d2c5c47, 0xbcf8b1b0, 0xbd2c365b, 0x3c59734c, 0x3ce80486, 0x3d464e63,
0xbd2d7b1f, 0xbc804414, 0x3d463d95, 0x3ce1367a, 0xbd332f6f, 0xbc972fda,
0x3cca32e6, 0x3d23aff5, 0x3d3fb20d, 0xba892400, 0xbca38ac3, 0x3b883350,
0xbcfe11c7, 0x3d3bf377, 0x3bc73210, 0xbc61e0ac, 0xbd131c43, 0x3a0ddc80,
0xbca5ecbd, 0xbd0f1b78, 0x3c69512c, 0x3d35d1f1, 0x3cc28532, 0xbbff91c0,
0x3b51c780, 0x3c03fcc8, 0x3cb255a2, 0x3c230300, 0x3d0815e7, 0x3bacb8c0,
0xbd039c7a, 0xbb3584d0, 0x3d1bfac9, 0xbd3ae958, 0x3cefc6a2, 0x3c235ae8,
0x3ccab992, 0xbd370b4b, 0x3a732200, 0xbd461592, 0x3cc961f6, 0x3c838242,
0xbc9cced3, 0x3d27de81, 0xbc8344fc, 0xbc7faee8, 0xbd1e254b, 0x3d469e51,
0x3ce20ebe, 0x3c2f144c, 0xbc357d2c, 0xbc3620e8, 0xbc04a334, 0x3c5956a4,
0xbc8ba3c4, 0x3bca29e8, 0x3d17d1e3, 0xbba196e8, 0x3c8c295e, 0x3d2c4267,
0x3c983e9e, 0x3d09932f, 0xb9cddb00, 0xbd090ac2, 0x3c2467e0, 0x39fd2400,
0x3d0f0b43, 0x3ca1e1d6, 0xbba80d18, 0xbcc25020, 0xbcc3dcb0, 0xbbe231e8,
0xbd26d855, 0x3adee9c0, 0x3d3ef06f, 0xbd2c23e5, 0x3d2cba01, 0x3cd42aca,
0x3ac605a0, 0xbcc3951a, 0x3b32c4b0, 0x3ce38f9a, 0x3a6874c0, 0xbb147a00,
0x3c7019a4, 0x3c9e6102, 0x3b0e2d80, 0x3c7dbafc, 0xbd20fbd8, 0x3d436619,
0xbd434c55, 0x3bc58228, 0xbd3591bd, 0xbbd1a028, 0x3c163ff8, 0xba18cb80,
0xbc6d2034, 0xbbc6aaf0, 0x3d1be929, 0x3cf2d14e, 0x3d3ecf11, 0xbce0bd70,
0x3cf668b2, 0xbd304c52, 0x3d0f5a29, 0xbb3c8050, 0x3d2a76fd, 0x3cdfec42,
0xbc131ed4, 0x3c8715da, 0xbced47e0, 0x3caca7c2, 0xbb68ff00, 0xbd2bfced,
0x3c6bbf0c, 0xbd313687, 0xbba436a8, 0xbcd181d7, 0xbd37cf83, 0x3c5b8504,
0xbd082a58, 0x3c96080e, 0x3cde49b2, 0x3a8d1bc0, 0xbd32c9b7, 0xbbaeaad0,
0xbc80155c, 0xbc08e3a8, 0x3ca31582, 0xbbea7eb0, 0x3d4b33a9, 0x3cd27dda,
0xbc883e6c, 0xbc9deb03, 0x3ceda292, 0xbc9d334a, 0x3cab4f56, 0x3d46cadd,
0xbd339477, 0xb98b6900, 0x3c947fb6, 0x3d023c31, 0x3c99d8a2, 0xbd1473f8,
0x3c3642c8, 0x3d2980c5, 0x3c5b1c54, 0x3d3bb0f1, 0xbd031e18, 0xbad1c9a0,
0xbccc6d0a, 0x3c952096, 0xbcaa9d87, 0x3cf9b81e, 0x3bfe83a8, 0xbc9c417a,
0x3af637c0, 0xbca5ffc3, 0x3cf64072, 0xbc8c5214, 0xbcb6240d, 0xbd30cb48,
0xbc1c45cc, 0x3d3953f1, 0xbc29d26c, 0xbd33c0e5, 0xbd130e08, 0xbd2e02cb,
0x3acbdc60, 0x3cef5bae, 0x3d0197ed, 0xbd1cff72, 0xbd11b5a0, 0x3d1b8873,
0xbd38de4d, 0xbd476057, 0x3d239081, 0xbc05e78c, 0xbc94c6f0, 0x3d00f2b7,
0xbbeb7c68, 0x3d307db1, 0x3d2f397f, 0x3d3b5935, 0x3c114f98, 0xbcc65a4a,
0xbd34016d, 0xbd05a335, 0x3d0d3551, 0x3c59b1c4, 0xbd235a40, 0xbd0a2bea,
0x3ccc2556, 0xbbfd6258, 0x3cd81886, 0x3d41dcc5, 0x3d37ecf7, 0x3cae1086,
0x3c73a234, 0x3d1c71a9, 0xbd3ca15d, 0x3d43e907, 0x3c94baae, 0xbd4b5aca,
0x3d09daff, 0x3c53a574, 0xbcf09773, 0x3b3b13b0, 0xbd27229d, 0x3d2593df,
0xbd2c7f62, 0xbd1eca76, 0x3c0888c8, 0x3b860140, 0xbcb67bb0, 0xbcf435aa,
0xbd2e8ce2, 0x3b89b750, 0xbccdf04a, 0xbcdbd9fd, 0xbc1118c0, 0xbd4c0207,
0x3ca91bf2, 0x3d2e3cd1, 0xbc160cac, 0x3c9bfa22, 0x3c031e94, 0xbbd129b0,
0x3d25f675, 0x3cda9792, 0x3d2aedb3, 0x3d412a1f, 0xbd0a4846, 0x3cdd4c76,
0xbcc4248a, 0x3c27b0a0, 0x3a615940, 0xbc66b220, 0xbd2e8bb8, 0x3d49ae11,
0x3d4332d9, 0xbcfc2100, 0xbd2ac383, 0x3cd667c6, 0x3d0c976d, 0x3c85c5fa,
0x3ba20c28, 0x3cf6ef96, 0x3c4b5c68, 0xb9b6ba80, 0xbcbafbf7, 0x3b0a1ee0,
0x3cee6332, 0xbc404a0c, 0xbc0f05f8, 0x3d1b3bcb, 0x3d4820bf, 0x3d2c90c9,
0x3d0d3843, 0x3b7f07d0, 0xbc6e3cd4, 0xbd017f98, 0xbbe09b70, 0xbc564360,
0x3d310a81, 0xbc68efa0, 0x3aaa1800, 0xbd4b4008, 0xbb92add0, 0x3d0a26d3,
0xbb03ccb0, 0xbb88e0d8, 0xbd0d3143, 0x3cd98022, 0xbcfba76a, 0xbcb0efaa,
0xbcb783ed, 0xbd2702ea, 0x3c23e634, 0xbd368ec2, 0x3bbb2b18, 0x3d43a38b,
0x3c07f7f4, 0x3c0f2cc0, 0xbca0230a, 0xbd451f0a, 0xbc8313cc, 0x3d4670e1,
0xbd406357, 0x3cbf59fe, 0xbca8e0ed, 0xbcb9bb3d, 0x3c817452, 0x3c900d2e,
0x3bd8d158, 0xbd2977c3, 0xbc3dd788, 0x3d12260f, 0x3cff63ea, 0xbcdeb8c3,
0xbced00da, 0x3ce76e82, 0xbcc8f677, 0xbc6648b4, 0xbd449ada, 0xbc9af66d,
0xbcbf552d, 0x3cdb28da, 0x3a1a6680, 0xbd1d79c0, 0xbcef2c2a, 0xbbf520b0,
0xbabc0a00, 0x3c8d280a, 0xbc989136, 0xbd0a489a, 0x3c368168, 0x3cc19ade,
0x3d2c7f03, 0xbd322e52, 0x3cb94f62, 0x3d0b907d, 0xbcb2682a, 0x3c09f140,
0x3bd4a1e8, 0x3d2550e5, 0xbced6c9d, 0x3d1c208f, 0x3d029b61, 0x3c80bfd6,
0x3c868faa, 0xbcd907aa, 0xbd31def2, 0x3d1d9951, 0x3cd8f40a, 0xbcf5fbd0,
0x3c9fcf6e, 0x3d32e6bf, 0xbc598380, 0xbd404c47, 0x3d030313, 0x3add26a0,
0xbc23c368, 0xbcbc4ff7, 0xbcfb37d7, 0xbd0f0d1a, 0x3d2cea83, 0xbcfc20f7,
0xbc3e6fa0, 0x3d28f981, 0xbc44ed28, 0xbc5752c0, 0x3bd6f0a8, 0x3d47bcb9,
0xba1b8b80, 0x3d00db71, 0x3b4f5150, 0x3c180534, 0x3ac24e00, 0x3d23a575,
0xbcb0afaa, 0x3c3df058, 0x3bdacd10, 0xbc2f4de8, 0xbcebcbad, 0xbc044674,
0x3d2a7241, 0xbd351873, 0xbcc99800, 0x3c644aa4, 0xbc93dba0, 0x3bd56c70,
0x3c22a874, 0x3c29316c, 0xbccde2fa, 0x3d04bf69, 0xbd2b2bd2, 0x3c24f6b4,
0x3d006067, 0xbd016525, 0xba8bdcc0, 0x3c7f18dc, 0x3cfa8832, 0xbc4c5414,
0xbcdd47ca, 0xbcfd17f3, 0x3d3dfcef, 0xbc986150, 0xbc7f99f8, 0x3d47203f,
0x3c1df868, 0xbcb19b1d, 0xbcec124d, 0xbc249dac, 0x3c8d9db2, 0xbcb76dc7,
0xbc90ab9a, 0x3d2d7e8b, 0xbd0ecbfb, 0x3b9ad180, 0x3d229639, 0xbd44e212,
0x3c86b72e, 0xbc825a46, 0x3cb2e2c2, 0x3ce0e25a, 0x3ccd776a, 0xbbec5d28,
0xbb71f950, 0x3c998342, 0xbc0e10a8, 0x3d38ba4b, 0x3d1626a9, 0x3cc00aa2,
0xbd3bfb45, 0x3c43b2d8, 0xbc601b14, 0x3bae2280, 0xbb8abdd0, 0x3d3ef73d,
0xbd47cbeb, 0x3d18422b, 0xbd079f7c, 0x3adfe460, 0x3d3962e7, 0xbd1ec823,
0x3ce4f25a, 0xbc419248, 0x3d0f8593, 0x3d39e519, 0x3d279cd7, 0x3ca695e2,
0xbce8d18a, 0x3c8369fe, 0x3c7b33dc, 0x3c92c912, 0xbd02a74e, 0xbce951ea,
0x3cddb652, 0xbd438bb0, 0x3c670944, 0x3d077419, 0x3aee6d40, 0xbcccddda,
0x3cd07792, 0x3aeb1140, 0x3d00ab6d, 0x3cdab052, 0xbc83a6c0, 0x3d378b65,
0x3d18ca3f, 0x3b5e9dd0, 0xbca3cd5d, 0x3cc7db5e, 0x3cece702, 0xbcdb7367,
0x3d2e6291, 0x3d23da33, 0x3c4d13ec, 0x3c9fef32, 0xbd25bf5b, 0xb9252b00,
0x3ad8d6a0, 0x3c337420, 0xbb6c2bd0, 0x3d031713, 0xbc4f236c, 0x3c51b244,
0xbc44ad4c, 0x3c9474f2, 0x3c063458, 0x3c13228c, 0xbbbba390, 0x3b4f2c60,
0xbc20288c, 0xbc1c6ec0, 0x3d2342e9, 0x3c6b03fc, 0x3b9fd890, 0xbbe72070,
0xbc351b0c, 0xbc4d3e14, 0x3cbe837a, 0xbb30cb30, 0x3ce17856, 0xbb8c5a58,
0x3c074738, 0x3c382288, 0xbbcd2b28, 0x3c82507a, 0x3b2a0b60, 0x3d44130f,
0x3c10d9e0, 0xbabce6a0, 0xbadf8600, 0xbb739c80, 0xbc600f80, 0x3c82c276,
0xbd2226f5, 0x3d3ff37f, 0x3d4426ad, 0x3d22f737, 0xbc591d08, 0xbd24f663,
0x3bdd6390, 0xbd386275, 0xbc866100, 0x3c695014, 0x3c814c0a, 0x3d3f3311,
0xbc6bc1e0, 0x3d32ca43, 0x3cb7d7ae, 0xbba1e9d0, 0xbd4bd5fa, 0x3ba978d8,
0xbca2af5a, 0xbb2bc200, 0x3cb7bb0a, 0x3d0ba59f, 0x3d169ef1, 0x3b0a2650,
0x3d1fc229, 0x3cfa4662, 0x3c9529de, 0x3cd13772, 0x3cd6f05e, 0xbca93473,
0xbcafe123, 0xbd02a278, 0x3c3c0cd4, 0x3c894c4a, 0x3c41bd00, 0x3c5ca0a4,
0x3d1b717b, 0xbcd16950, 0xbc7de328, 0xbd3cf5ef, 0xba650800, 0xbd3e2408,
0xbb54cbb0, 0xba0f8cc0, 0x3cd82822, 0x3d3d792f, 0xbc9516b4, 0x3b1d1d50,
0x3d368979, 0x3c5e6dec, 0xbd3cf378, 0x3d3a8635, 0xbd4662e0, 0x3ca3eb6a,
0x3bd87628, 0xbd4aa05b, 0x3cc1540a, 0x3d11f57f, 0x3c6448c4, 0x3a90a600,
0xbd25e66b, 0xbd3333bf, 0xbc35e6e8, 0xbca0f943, 0x3b20bee0, 0xbd1881d8,
0x398e8580, 0xbd1f24b5, 0xbc42176c, 0x3d46a8a7, 0x3d17a7fd, 0x3ca6c69a,
0xbc153748, 0x3bb1acf0, 0xbd2a041b, 0x3caf685e, 0x3ac27160, 0x3c1830a0,
0xbc5498b8, 0x3c462634, 0x3d08fa25, 0xbd1eb5a2, 0xbc1f14d4, 0xbceeee57,
0x3ceae45a, 0x3c4c2028, 0xbca0930a, 0xbcad99ed, 0xbd01bb5a, 0xbc541b68,
0x3d47d671, 0xbc8964d0, 0xbc3b78f8, 0x3cbf18fa, 0x3d2a8f6d, 0x3bad6668,
0xbcc31657, 0xbcdf69d7, 0xbc216f8c, 0xbc76a434, 0x3d06df89, 0xbd2d9123,
0x3c8ffc22, 0x3cd98b1a, 0xbb93ef10, 0x3d4a7163, 0x3d0d6471, 0x3c02b808,
0x3b9e7940, 0xbc331560, 0x3cfa9c82, 0x3cd98a2a, 0x3ad2af00, 0x3d16e8bf,
0x3d04c911, 0xbcb0a740, 0x3d0eae19, 0x3d42eb55, 0x3c9cf206, 0x3d3a18c9,
0xbb4e7e50, 0xb9f4ad00, 0xbcf3437a, 0xbd2d651f, 0x3c2297ac, 0xbd3bb2c8,
0xbc5efd4c, 0xbc949774, 0x3cc4f6a2, 0xbd0a815a, 0x3cee9902, 0xbcbb15a0,
0x3c82e192, 0xbd1b7e8e, 0xbcf11be0, 0x3bbbe510, 0xbce9d433, 0xbd13d5bb,
0xbc6815ec, 0x3c89ceb2, 0x3cee4ede, 0x3c6b3384, 0xbd112576, 0xbcda1fa3,
0xbc8a3dca, 0x3c51d724, 0x3cf2124a, 0xbbe8eeb0, 0xbcdb7f5d, 0xbd2cc46e,
0x3d3909f3, 0x3c75b3fc, 0x3d1b4d4f, 0x3c8dcb66, 0xbbf7bad0, 0x3c82e00a,
0xbca273e7, 0xba8bc8a0, 0xbc7053f8, 0x3c9c67ae, 0xbb958c40, 0x3c20db00,
0x3c1b5a28, 0xbc9967d0, 0x3ca42a9e, 0xbce59ef3, 0xbd31c562, 0xbd01404c,
0x3d06f385, 0xbc8bcd74, 0xbb05c3b0, 0x3cbbf1f6, 0xbcf06560, 0x3d13e9e9,
0x3c083118, 0xbd183ebb, 0x3cda6dd6, 0xbd29999b, 0xbabd2ea0, 0xbce821b0,
0x3c419c60, 0xbd2b8af8, 0x3d1f3849, 0xbca0c1ca, 0x3c5a8f1c, 0x3d1ce21b,
0xbcaf98e0, 0x3d3c0893, 0x3d0a853f, 0x3cf646aa, 0x39affb00, 0xbd389690,
0xbd4b39d3, 0xbb503720, 0xbbb53590, 0xbbd704b0, 0xbc37d514, 0xbd0719dd,
0xbae6c6a0, 0xbcdbf147, 0xbc20dd08, 0xbd4c05fd, 0xbc81f7f0, 0x3bf4ba30,
0x3cd79452, 0x3d452637, 0xbc461978, 0x3beec000, 0x3d338637, 0x3c9bf462,
0xbd32ee0f, 0x3c22b3a0, 0x3d29b317, 0x3d3c7313, 0xbc376740, 0x3c8c37a2,
0x3d0ca591, 0x3b46b2a0, 0xbc4f2848, 0x3c721f2c, 0x3c8cd96e, 0xba25f740,
0xbbd8b2e8, 0xbb5a3650, 0xbc22d698, 0x3cd440fe, 0x3d1f4db9, 0x3d4323b9,
0x39689e00, 0xbd07b34e, 0xbccfa89a, 0xbb9e7b28, 0xbd494eaa, 0xbd385b07,
0xbbb5fa98, 0xbcbaf4d7, 0x3cc7dc46, 0xbcb7a5dd, 0xbb0a16b0, 0xbb51f160,
0xbd3c0b1a, 0xbc1142ec, 0xbd3f8dd5, 0xba843260, 0x3ca5cc22, 0xbd26a015,
0xbce361f0, 0xbc10a48c, 0x3c9f7b6e, 0x3c9287de, 0xbc81e2a4, 0xbd37b89b,
0x3d480471, 0xbd14a0eb, 0x3d234b61, 0xbc89835c, 0xbcbccc1d, 0xbd291efa,
0xbcf1d68d, 0xbbd96c40, 0xbcb922aa, 0x3c80bdfe, 0x3c7c8024, 0xbd105d62,
0x3d244d31, 0x3cbbe22a, 0xbcb32eb7, 0xbcd1cb73, 0x3d0e8799, 0xbb920a68,
0xbd2e2b60, 0x3cbdb9e2, 0xbcfa0777, 0xbd06be54, 0xbd24d3bb, 0x3d3683c3,
0x3ceffe3a, 0x3ccc9cca, 0x3c3e2b00, 0x3ca3238e, 0xbd37e2b0, 0x3d11c961,
0xbd4ae8a3, 0xbd486c65, 0xbb8237e8, 0x3d30f539, 0x3d14c629, 0xbd4193eb,
0x3d26de35, 0xbd25110b, 0xbd1cc35a, 0x3c810422, 0x3d3cb60d, 0x3d48e591,
0xbd044924, 0x39545e00, 0x3d09ce5f, 0x3cef5336, 0xbb5d5b50, 0xbd037c0c,
0xbcb4b237, 0x3d4a11b9, 0xbcf4825a, 0xbd168eca, 0xbd2f5fad, 0xbba23d80,
0x3ceb122e, 0x3b070ed0, 0x3c4e9b4c, 0x3c580244, 0xbd461647, 0xbbc52830,
0x3d2c6e15, 0xbc8c15cc, 0xbd0d8fd2, 0x3be4a1f0, 0xbc210068, 0x3ca9a456,
0x3cc74eba, 0xbd1a8588, 0xbc784c48, 0x3c8cfe52, 0x3d2dafa9, 0xbc666754,
0x3cbad202, 0xbbdb5b28, 0x3c49e0f8, 0xbd3035cf, 0x3cc6bd0e, 0x3d17fb77,
0x3b60c620, 0xbd34bfc3, 0x3cdd6aa6, 0xbd1da1de, 0xbd1d27b2, 0x3ba27e28,
0x3cde5c2a, 0xbd4c18b2, 0xbcbcc0fd, 0x3b6fb6e0, 0xbc227260, 0x3cc3e3e2,
0x3cda3926, 0x3c0f5880, 0x3d452a2f, 0xbcca98d0, 0xbd462d60, 0xbd0ba370,
0x3cd64fb2, 0xbd4a8e37, 0xbd05dfee, 0xbc1a9bd4, 0xbd268438, 0xbcf40b2a,
0xbd4a88bd, 0x3c603f74, 0xbba3e3f0, 0xbbd827a8, 0x3c8485b2, 0xbd3ee2c2,
0xbd466335, 0x3c846b4a, 0xbd3703c0, 0xbd0ffab3, 0xbca240fd, 0x3ceacad2,
0x3c4fbdb4, 0x3c0c45c8, 0x3d05a8d5, 0xbc5c3f28, 0xbd3ea837, 0xbd129b55,
0x3cb3689a, 0x3d26abd1, 0x3d0cf0e3, 0xbcbe0683, 0x3ce1872a, 0xbc4cca28,
0xbc85cbca, 0xbb3e8460, 0xbd0e79e3, 0x3c89b682, 0x3d382369, 0xbd0e41a0,
0x3c99454a, 0xbad781c0, 0xbc811614, 0xbd37d59f, 0xbcc4fdb3, 0x3b3baa60,
0x3d470b9b, 0xbcb15893, 0xbd2e08ef, 0xbcab4813, 0xbbdd75e8, 0x3d092ff3,
0x3d091ac5, 0xbcbe0f03, 0x3d009871, 0xbd1deac2, 0x3d47da6f, 0xbc7323f8,
0x3ce8096e, 0xbcc2410d, 0xbcffbc97, 0xbbbd9830, 0x3d459729, 0xbc136060,
0xbd0330e4, 0xbce041ed, 0x3c98ac5a, 0xbd10a4b2, 0xbd3e3037, 0xbd206468,
0x3d34e981, 0x3c389ea0, 0xbd242522, 0xbcbe9850, 0xbcd60ee7, 0xbcfb070d,
0xbb028f80, 0xbbea97e8, 0xbbaa1f28, 0x3d18b097, 0xba530cc0, 0x3d1a05c9,
0xbd17b3ba, 0x3c81adf2, 0x3d21a6a3, 0xbd302f33, 0xbd28c162, 0xbc43e194,
0x3c277c58, 0xbcd14130, 0xbb89d3a8, 0xbc3f92d8, 0x3d3b5e07, 0x3bdde368,
0xbcec6d4d, 0xbbbdede8, 0xbabb21c0, 0x3cddbbd6, 0xbd25cc2e, 0xbc6c92c8,
0xbccb1030, 0xbcdc1163, 0x3cfb8c12, 0x3d3f2e85, 0xbd3707b8, 0x3c282b20,
0x3b7145d0, 0xbd115813, 0xbbc6f800, 0xbd103956, 0x3ba25528, 0xbd2697ab,
0x3cfb773a, 0x3d38ad2f, 0x3bf5df80, 0x3c631b0c, 0x3d46ce7d, 0xbc743eec,
0xbc589f8c, 0xbd3a9070, 0xbd2e9e9b, 0xbccaef27, 0xbcf61793, 0xbcfd47a0,
0xbd048d2d, 0x3c33edc8, 0xbca6d920, 0x3d16f5a3, 0x3bd1a650, 0xbc916a34,
0x3ca1a002, 0x3b86b698, 0x3cc09626, 0x3d382fdf, 0x3cd125ba, 0xbcc69920,
0x3bd58e18, 0xbb379360, 0x3ccf4b92, 0x3d3c2fd1, 0x3be5cd10, 0x3926e600,
0x3d1a42b1, 0x3c4412cc, 0xbc251cac, 0xbcba31ea, 0x3c98b6b2, 0xbbb536b0,
0x3c8b7ca6, 0x3cb01d82, 0x3cac849a, 0x3c575ec4, 0xbc6ff768, 0xbd43457b,
0x3bc20340, 0xbcfe39ba, 0xbd2dcad5, 0x3d1c6923, 0x3d20d2a9, 0x3ccd6d42,
0x3d140969, 0xbd47ea7f, 0xbc9d1967, 0xbad11440, 0x3d3fd6b3, 0x3d0406db,
0xbcd0d390, 0x3d0117c9, 0xbb4abfd0, 0x3ca4b0a2, 0x3d3c14df, 0xbcc52653,
0x3be00400, 0xbc633560, 0x3b9ba198, 0xbca1ecad, 0xbd148732, 0xbcf05240,
0x3d3c4535, 0xbd2df2a7, 0x3bdc7bf0, 0x3d1a9d01, 0x3b04afd0, 0xbcbf7093,
0x3d10cf11, 0xbd20fecb, 0x3c5a2294, 0x3cfaa8c2, 0x3d4544a1, 0xbb778fb0,
0x3bd6c468, 0x3c533e64, 0xbb03f380, 0xba8cc760, 0xbd1b780a, 0xbc33f834,
0x3ca93136, 0xbcee5fa7, 0x3d4824bd, 0xbc8c2364, 0xbc96c32c, 0x3b5274b0,
0xbd40acb2, 0xbb0aa3b0, 0x3c5e3a04, 0x3cb05e5a, 0xbbf5a490, 0xbd469270,
0xbcb1613d, 0x3c4d4104, 0x3d29fd19, 0xbd3ca957, 0xbd367eca, 0xbcf4b8b0,
0xbd4899d8, 0x3c4ad04c, 0x3cd504aa, 0xbd292aa0, 0xbc93fb1a, 0xb8927000,
0xbcb399bd, 0xbcb1882d, 0x3cdf1e82, 0xbd154a58, 0xbba65590, 0x3d223bf5,
0xba21a2c0, 0x3c9cadfe, 0xbccd19c3, 0xbd063e1e, 0x3d2fa8af, 0xbcaad777,
0xbd493cf5, 0xba19c780, 0x3cdf4afe, 0x3cf71c46, 0xbd0e8150, 0x3d2b94df,
0x3c9890e6, 0xbc875256, 0xbb92a798, 0xb8d05400, 0x3b83e610, 0xbcf30377,
0xbc970b7a, 0x3cb85f32, 0x3d0aeb31, 0xbd100dc5, 0xbd2ec743, 0xba81f1a0,
0xbcd2f36a, 0x3c8b8912, 0x3cd213ce, 0xbcd8505a, 0x3caf84ca, 0xbd1a1f43,
0xbd22fc05, 0xbc38fb40, 0x3c29ffa8, 0x3d21e4f9, 0x3d336049, 0xbc29fb14,
0x3d4c8f65, 0x3d0156b9, 0xbc9c1a63, 0x3bf1d810, 0x3d2f3379, 0xbcc6024d,
0xbd2b784e, 0x3cc61f72, 0x3bcad3e8, 0x3d1d16c7, 0x3c493368, 0x3d4a3853,
0x3d2f9a0f, 0xbd18cc55, 0x3ca27c92, 0xbc0e0578, 0x3d2f9f6b, 0x3d25c15f,
0xbccba443, 0x3d2861f9, 0x3cdd1c26, 0xb9bba980, 0x3c215ce8, 0xbc6fe358,
0xbd436fd3, 0xbc5fa958, 0xbcfd9ef3, 0xbc2e3d88, 0x3c9630be, 0xbd019f08,
0x3c552b0c, 0x3ccead72, 0x3d3161b5, 0xbd349167, 0x3cfb291a, 0x3baf3a70,
0xbd30eaef, 0x3d36d16d, 0xbbff9db0, 0xbd05cfe5, 0xbd46d333, 0x3a5d36c0,
0xbd2f322b, 0x3c6ea574, 0x3cc23a2a, 0xbd087a4d, 0x3c9e21b6, 0x3c8b4572,
0xbcfb10fd, 0x3d256731, 0x3ca1cd0e, 0xbd4060a8, 0x3c9c80e2, 0x3d0bb7b1,
0x3caec47a, 0xbca2cfaa, 0xbcd33083, 0xbbd930f0, 0x3d2a8e01, 0x3a034b80,
0x3c964966, 0x3d2e454f, 0xbd1daa35, 0x3d42e051, 0x3cb0dc8e, 0xbd03e9f0,
0x3ce23c82, 0x3d2b9c51, 0xbad26360, 0x3cf6b6c2, 0x3c5ccecc, 0x3d0d4d23,
0xbd2023dd, 0xbd080fdd, 0x3d27cddf, 0x3d4c3a39, 0x3c8303fa, 0x3cce2002,
0xbd420ceb, 0x3ce895e2, 0x3d1dd9a3, 0xbc269ba0, 0xbcce26cd, 0x3ce6a7ea,
0x3cbdf30e, 0xbd48fe87, 0x3c5c97a4, 0x3c961dfa, 0x3c323fb4, 0x3d1aa5ef,
0xbb308e50, 0x3d0699af, 0x3cbf1eb2, 0xbd0a3460, 0x3ba9a618, 0xbcdfe007,
0xbc13b634, 0xbc5bbbe0, 0x3d2a4e3f, 0xbcd5f22a, 0x3c76f9f4, 0xbc9b65cd,
0x3cb59b36, 0xbcaa9fd0, 0x3ccb71da, 0xbd38c728, 0x3cc6f0ca, 0xbd1d5c6a,
0x3d320255, 0xbd3a9ed5, 0x3b3d4930, 0xbd3aaa4d, 0x3c9e2a82, 0x3be26210,
0x3b52f560, 0x3cbaf15a, 0xbc9efa8a, 0xbd0726e6, 0xbd2c5ebd, 0xbd0af8a2,
0x3d26a0d7, 0x3cc926b6,
};
// 5
uint32_t bias_vals[] = {
0x3bded4d8, 0x3c9d39d2, 0x3ca89fd2, 0xbc5af538, 0xbcb69fcd,
};
// 3,1,1,5
uint32_t output_exp_vals[] = {
0x3c0f5041, 0xbd5feb0d, 0xbe2ac302, 0x3e4629df, 0xbf31fe38,
0x3e5c01b4, 0x3e7c96f6, 0xbce63e5a, 0x3e379fba, 0xbf3027ad,
0xbdb021b6, 0xbe97d08d, 0xbef57ffa, 0xbdfbe7fc, 0xbf1bf24c,
};
// 3,1,1,5
uint32_t output_relu_exp_vals[] = {
0x3c0f5041, 0x0, 0x0, 0x3e4629df, 0x0, 0x3e5c01b4, 0x3e7c96f6, 0x0,
0x3e379fba, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0,
};
test_conv2d(set, strides, input_vals, kernel_vals, bias_vals, output_exp_vals,
output_relu_exp_vals, VALID_PADDING, NULL);
}
void test_valid_padding_zero_strides_medium_with_clip() {
input_set *set = &medium_input;
strides_input_set *strides = &zero_strides;
// 3,10,8,5
uint32_t input_vals[] = {
0x3f36b631, 0x3b9ca600, 0x3f76ac7f, 0x3d3f3570, 0x3e70e1b4, 0x3e3830f8,
0x3f3f9678, 0x3e1fb9dc, 0x3dd55dd0, 0x3aef4000, 0x3f433347, 0x3df08b90,
0x3eb33e84, 0x3dc553c8, 0x3ee7a2f2, 0x3e377d78, 0x3f3e7fdb, 0x3f1236d7,
0x3ea73b60, 0x3e0cc410, 0x3f57cffe, 0x3f1253af, 0x3eeea1d6, 0x3f116d9b,
0x3de6adb0, 0x3f335bd0, 0x3f459139, 0x3f221418, 0x3e089dac, 0x3f208980,
0x3ec148ac, 0x3dfe44a8, 0x3f434e54, 0x3f207099, 0x3f6d610b, 0x3f4929fb,
0x3f6fef6c, 0x3e96ad46, 0x3f4d17d4, 0x3e9b4166, 0x3eabb14a, 0x3b020c00,
0x3f09f77a, 0x3f68586f, 0x3ef184c6, 0x3f1b4509, 0x3df6bf78, 0x3f73cdae,
0x3f71564c, 0x3e0d0f10, 0x3f25afae, 0x3f4cc291, 0x3e5e4880, 0x3f544618,
0x3f165f1a, 0x3f0d128c, 0x3f0459a7, 0x3e24cc88, 0x3eb49e0a, 0x3f247e0f,
0x3f19b458, 0x3e607e7c, 0x3deeb150, 0x3ee86844, 0x3ed8b3e2, 0x3efaeafa,
0x3eca4cde, 0x3f4c835c, 0x3f71404c, 0x3f6162dc, 0x3eb60aea, 0x3d6169d0,
0x3d298250, 0x3f3facbb, 0x3dc7db50, 0x3f476996, 0x3d84cdb8, 0x3e8381c8,
0x3ebc1c56, 0x3edda720, 0x3f327cb8, 0x3ec91382, 0x3f567352, 0x3f60a3cf,
0x3e63cdd0, 0x3f2fbe4e, 0x3f61a525, 0x3e4d10dc, 0x3d8119d8, 0x3f706446,
0x3f566a2d, 0x3f263a00, 0x3f68176a, 0x3f6d8d15, 0x3f6c6af8, 0x3e741c84,
0x3f182317, 0x3e885446, 0x3f479de4, 0x3f7de85f, 0x3ec4857c, 0x3ebc271a,
0x3f143fe7, 0x3e0877c4, 0x3f75c402, 0x3ef8b408, 0x3f748088, 0x3f10c163,
0x3dbccff0, 0x3f32b1fc, 0x3f674b45, 0x3f0139d4, 0x3ef9b882, 0x3f64196c,
0x3defe888, 0x3dd093e8, 0x3f2b3835, 0x3f4fb95d, 0x3f7e460c, 0x3e24b3c8,
0x3e5a5130, 0x3f0208d2, 0x3f6cb694, 0x3f0b2b70, 0x3f1eaf41, 0x3ec30552,
0x3f13f950, 0x3e405394, 0x3e9bf8c4, 0x3d5b8a00, 0x3d954c60, 0x3f2aaf0d,
0x3f47c963, 0x3f48e285, 0x3f5108e1, 0x3dd6f800, 0x3f623988, 0x3f22332e,
0x3f39bf14, 0x3f37a5a6, 0x3f57aec5, 0x3f49ad86, 0x3efb3ac4, 0x3ee00cf8,
0x3f1e8a44, 0x3f05f896, 0x3f10619b, 0x3e1a016c, 0x3f667a9f, 0x3d823af0,
0x3dc83748, 0x3f332ca5, 0x3f25f83e, 0x3f52f762, 0x3f5d1d04, 0x3ed5bac2,
0x3eb30346, 0x3f5ce83e, 0x3f1fe917, 0x3ee4e7f6, 0x3ef79562, 0x3f577e28,
0x3d910f88, 0x3f427613, 0x3e94929e, 0x3ea4fce6, 0x3f23ee9a, 0x3d1c8fb0,
0x3d01a550, 0x3f42d47f, 0x3da77a08, 0x3f60995e, 0x3c0a9380, 0x3e88165e,
0x3f0ad115, 0x3f45a98a, 0x3ed80798, 0x3f1d936a, 0x3f0e07fa, 0x3e8e1ef2,
0x3ee0618c, 0x3dbbedc8, 0x3eb6713c, 0x3f22cf06, 0x3f1df573, 0x3f7cf87a,
0x3f19492d, 0x3eb07ab6, 0x3edd0088, 0x3e2a9198, 0x3defc080, 0x3f340f50,
0x3f773033, 0x3f3054c9, 0x3f76f70a, 0x3e66eb1c, 0x3f676d3a, 0x3ebeb408,
0x3f26fe18, 0x3f703110, 0x3ef2b336, 0x3f319c97, 0x3e3adb10, 0x3d1b3af0,
0x3f3b0802, 0x3e1b7498, 0x3f2f4afe, 0x3f0b8386, 0x3f285fa0, 0x3f43b72e,
0x3f5d486b, 0x3f65dcf2, 0x3e66fba4, 0x3eeb2f94, 0x3b42bd00, 0x3f62fc54,
0x3ea805d4, 0x3eb4fe52, 0x3e1fab68, 0x3e6362b8, 0x3f73f5a9, 0x3f7103c8,
0x3f439d03, 0x3f0451db, 0x3c9a8dc0, 0x3e3ccb48, 0x3f419ff7, 0x3f1d8aa7,
0x3ec10a20, 0x3e752480, 0x3f5b1273, 0x3f557bf6, 0x3e20a7b0, 0x3e701b88,
0x3daf0738, 0x3f58dfea, 0x3f5b49d0, 0x3f79dc64, 0x3d35c960, 0x3f5e3797,
0x3e9a238c, 0x3dbb67a8, 0x3f56999b, 0x3f6b147d, 0x3f450ebd, 0x3f0e0ba6,
0x3f42ab7d, 0x3e22fa34, 0x3e491070, 0x3f4b6181, 0x3f3cfea2, 0x3f773612,
0x3f37c12c, 0x3f11311c, 0x3e8e5a48, 0x3f5b9871, 0x3e184f28, 0x3f349900,
0x3f668a71, 0x3f618f59, 0x3d072ca0, 0x3eea0f1e, 0x3e109d28, 0x3edfbf9c,
0x3edb0210, 0x3f7f6eff, 0x3e478798, 0x3e182b74, 0x3e306d94, 0x3ebdac5e,
0x3ebfa2ce, 0x3ec22efa, 0x3db01a70, 0x3c854b20, 0x3f44eb5b, 0x3f342728,
0x3f131358, 0x3f390480, 0x3f154485, 0x3f54c400, 0x3e4200a0, 0x3f4477e4,
0x3f164d00, 0x3e7f1908, 0x3e10028c, 0x3debfcd8, 0x3eb5adde, 0x3f361860,
0x3f0955a3, 0x3f79dc1a, 0x3e254178, 0x3cd96800, 0x3e8f8ee8, 0x3f28e2cb,
0x3ee6297c, 0x3f4b6ffb, 0x3f451d83, 0x3f603f0b, 0x3eb77766, 0x3f22176f,
0x3e81b948, 0x3f47cfda, 0x3f10581d, 0x3f56ca4d, 0x3b0bb400, 0x3f4a6862,
0x3ed19728, 0x3dceec28, 0x3edeafc6, 0x3f0e3fea, 0x3f26b23b, 0x3f6d93ef,
0x3ebf908a, 0x3e99d570, 0x3e814d2c, 0x3f32ea59, 0x3f62acaf, 0x3f4152b9,
0x3e2d777c, 0x3e3f6a7c, 0x3ec70f36, 0x3ec7937c, 0x3f1c251e, 0x3f26d5de,
0x3e951d02, 0x3eaf0570, 0x3f04f161, 0x3ed8d624, 0x3e437414, 0x3efeceba,
0x3e8838fe, 0x3f168c18, 0x3e1a49a8, 0x3f1ae81d, 0x3eb71206, 0x3e61cee8,
0x3ecdd5a4, 0x3e418040, 0x3edfb0b6, 0x3e38040c, 0x3f76051f, 0x3ec0ee50,
0x3f6e6777, 0x3f57f7f9, 0x3f5ba562, 0x3ef21660, 0x3e8c7112, 0x3e5ab544,
0x3e827f6a, 0x3f261474, 0x3e9237d6, 0x3e5cd520, 0x3f5ab4cb, 0x3e81e382,
0x3d0e0a40, 0x3d0e87e0, 0x3f3fecc7, 0x3eaa5012, 0x3edeea7a, 0x3f1ad9aa,
0x3f059183, 0x3eb3fbb8, 0x3f014789, 0x3e4747a0, 0x3f1f716e, 0x3b714000,
0x3e852fde, 0x3c56b680, 0x3f30cc9a, 0x3f04b414, 0x3efcfc4a, 0x3f70c129,
0x3eacc898, 0x3ea23a38, 0x3f1f14e9, 0x3f50d1f7, 0x3f147713, 0x3f6be6c0,
0x3f3483e7, 0x3eea910a, 0x3e591a9c, 0x3f5c5ad6, 0x3d514e70, 0x3f4a49a7,
0x3f37fad8, 0x3f46540c, 0x3f74a543, 0x3f40679c, 0x3f4abe61, 0x3e178ffc,
0x3eae56a8, 0x3edbefee, 0x3da237b0, 0x3ed100d2, 0x3e92d8a6, 0x3dec6e58,
0x3f1317ce, 0x3f12ab4d, 0x3f45daaf, 0x3f5fe2b6, 0x3e42cb94, 0x3f7a260c,
0x3f0d94d4, 0x3e2f4678, 0x3f5da986, 0x3f3ecd03, 0x3f6e32c6, 0x3eb08d0e,
0x3ea099be, 0x3e11011c, 0x3f766f53, 0x3e6611c4, 0x3f5d2b28, 0x3ee70ede,
0x3f658ee8, 0x3d2f4970, 0x3f1d4a31, 0x3d9109c0, 0x3ce695e0, 0x3f7f63bf,
0x3e8fd860, 0x3e3e820c, 0x3f0d6155, 0x3f032f81, 0x3f10d55d, 0x3f6d3b5e,
0x3f02d3a0, 0x3f56b825, 0x3f146fa0, 0x3de117f0, 0x3ef0853c, 0x3f6a0c73,
0x3de4ad70, 0x3e9a66fc, 0x3f18cfc5, 0x3f1944d9, 0x3f3f06b1, 0x3ef8a9a6,
0x3f298468, 0x3ecc4c4c, 0x3f123787, 0x3d1cee30, 0x3f3b25da, 0x3f6fd971,
0x3f5c07c3, 0x3f34bfaa, 0x3e91d31a, 0x3f33a6b7, 0x3ed4457e, 0x3ca0a0e0,
0x3e968518, 0x3da00258, 0x3f0f0d86, 0x3e6f51dc, 0x3f20c172, 0x3f109e04,
0x3f3c4c11, 0x3f33be00, 0x3d69d2d0, 0x3efcff56, 0x3f5730f0, 0x3edf8088,
0x3e1b1620, 0x3e0a22dc, 0x3f0a1397, 0x3eea6736, 0x3e09b75c, 0x3f6a9bfc,
0x3e64d1b8, 0x3f580cd8, 0x3d290cc0, 0x3eae325e, 0x3f02dad5, 0x3e057dd8,
0x3f024ff2, 0x3f2c3e6f, 0x3f2a014a, 0x3f52bd7b, 0x3e9b5a80, 0x3dc798e8,
0x3ebea594, 0x3f0bd9a6, 0x3f1c50ef, 0x3f468365, 0x3e5554cc, 0x3ec9fdf8,
0x3f781a74, 0x3ee3854a, 0x3f280dd5, 0x3e997f22, 0x3f42c1ab, 0x3efc99ea,
0x3f36d3c3, 0x3f735e96, 0x3f0da6b8, 0x3f166eb5, 0x3f284366, 0x3f7982ae,
0x3f7f159b, 0x3f31652b, 0x3e4a43b8, 0x3eb27360, 0x3f7e42e8, 0x3ea3d00a,
0x3ed03742, 0x3e89dd14, 0x3f0f4624, 0x3da20d48, 0x3e9bbeda, 0x3e9af660,
0x3ed915e6, 0x3f0495dd, 0x3f07c35c, 0x3f118423, 0x3f382ace, 0x3ec0c056,
0x3f6df319, 0x3e1030fc, 0x3dd09b68, 0x3c62da80, 0x3ec9ce0c, 0x3f35a223,
0x3f4a9439, 0x3f459f23, 0x3f5971aa, 0x3e220b1c, 0x3e5c371c, 0x3f5be3f7,
0x3ed1ecf8, 0x3ee23da0, 0x3f5cba00, 0x3d643c00, 0x3e187990, 0x3da69340,
0x3f5910ef, 0x3f426233, 0x3cc6e800, 0x3ef1344e, 0x3dfae618, 0x3f60b7f5,
0x3f1942a2, 0x3ea30810, 0x3f5892c1, 0x3e1f3270, 0x3f0dac81, 0x3f1cfb60,
0x3f108bc3, 0x3f54a5b3, 0x3f232e5b, 0x3f287d6a, 0x3f0efc17, 0x3f786b06,
0x3dfe15e8, 0x3f621efb, 0x3e5e68d4, 0x3f5fb5c0, 0x3ee6d8ca, 0x3f55f16d,
0x3f656889, 0x3f5bb5c0, 0x3f1cf6c7, 0x3f320bfc, 0x3ed2ad8a, 0x3eb98256,
0x3f4aeaf3, 0x3f137f9e, 0x3f410d87, 0x3d991dd0, 0x3e569a0c, 0x3ed7cafe,
0x3e668c48, 0x3def9d98, 0x3f197f5b, 0x3e8e60d6, 0x3f31e438, 0x3f2ab320,
0x3db0ee68, 0x3f3ba15f, 0x3e223e2c, 0x3ef1a91e, 0x3d040020, 0x3f308052,
0x3f4cc3a8, 0x3ebdff66, 0x3f640150, 0x3f285ea6, 0x3f3ba978, 0x3e23e124,
0x3d465d60, 0x3f34b0c8, 0x3d886860, 0x3e88cc22, 0x3e0d2ad4, 0x3f446a6a,
0x3f5d5169, 0x3e960c80, 0x3f140f03, 0x3e27fde0, 0x3cf0dd40, 0x3d3a6940,
0x3f43937e, 0x3c2b1f80, 0x3f488a07, 0x3e418210, 0x3f7cb6bd, 0x3e4f0bf0,
0x3f019d08, 0x3d810b18, 0x3f55e69d, 0x3f2fb0bf, 0x3f65ec48, 0x3f1f602f,
0x3efdd75e, 0x3e0104f0, 0x3f3e3b36, 0x3e651c0c, 0x3f01362b, 0x3eb4c58a,
0x3f1e99ac, 0x3f1c85ab, 0x3f793d8d, 0x3ed75d7a, 0x3f37e2a9, 0x3f5fe002,
0x3f541199, 0x3f27737e, 0x3f354703, 0x3f19231c, 0x3f7e2bde, 0x3f788080,
0x3e910fb4, 0x3eb9a258, 0x3e7d73d8, 0x3f40c445, 0x3eea30f6, 0x3f4e1083,
0x3f5f484b, 0x3f165b7a, 0x3ebb4c5e, 0x3db7c988, 0x3d6a4e60, 0x3d8dffa8,
0x3f75cb55, 0x3f4924cc, 0x3f7589a7, 0x3e3ba718, 0x3e5b64c4, 0x3f15130b,
0x3ef78d22, 0x3eca0304, 0x3e3f3a18, 0x3da92190, 0x3e812406, 0x3eb1109e,
0x3e84898c, 0x3f10d994, 0x3d43c8a0, 0x3f044912, 0x3f006ab8, 0x3f4ecb83,
0x3f0c933a, 0x3f5ee4ab, 0x3d297a30, 0x3f1cb629, 0x3f476f1e, 0x3d8f4010,
0x3ec0a59e, 0x3e3780b8, 0x3f55d398, 0x3f11230d, 0x3f7b83d5, 0x3ddb9ed8,
0x3ed072d2, 0x3f35bad5, 0x3eefee28, 0x3effc15e, 0x3f1a8c66, 0x3e40f244,
0x3f3d1e68, 0x3ecf06e8, 0x3e6e97fc, 0x3df891f0, 0x3f6c646f, 0x3f132603,
0x3f755d4b, 0x3f030eb4, 0x3f069de3, 0x3f18d89b, 0x3ef31d78, 0x3f4dfd0b,
0x3e921c74, 0x3f3cd952, 0x3f632436, 0x3f35bf2e, 0x3f4c3a85, 0x3f23506e,
0x3ec9ddfa, 0x3ec5b4de, 0x3f1ec970, 0x3f519f70, 0x3f2e5652, 0x3f41cd3e,
0x3edc6592, 0x3cb093e0, 0x3e740a40, 0x3eb761fc, 0x3f1a4575, 0x3f204993,
0x3f14b1c6, 0x3f1ab9cd, 0x3f077650, 0x3dca4d18, 0x3eb3e18e, 0x3e8e4ffe,
0x3d8a25f0, 0x3f4c2939, 0x3f193155, 0x3ea8bc8e, 0x3e707ccc, 0x3f4a5d88,
0x3e32faac, 0x3f52839c, 0x3c1bd940, 0x3f318061, 0x3b900d80, 0x3ec6ea42,
0x3ec4a776, 0x3f606dab, 0x3f1a2caf, 0x399c3000, 0x3eda3bd4, 0x3f0c18e1,
0x3f73d7fe, 0x3f15ad0b, 0x3bbcb600, 0x3e2d4114, 0x3f105459, 0x3ba1d600,
0x3e868ece, 0x3f4eeef6, 0x3f0a6cdd, 0x3f0b01ca, 0x3eab4cdc, 0x3f3b253e,
0x3f0d522a, 0x3f04b44f, 0x3f214601, 0x3df97570, 0x3f781e88, 0x3f004a84,
0x3dc4e2c8, 0x3cae3f40, 0x3e981544, 0x3f0fa8be, 0x3f145be8, 0x3f1a41ed,
0x3f611d95, 0x3f6172a1, 0x3f349c83, 0x3e9f1dd2, 0x3f7a2bf5, 0x3f37d399,
0x3f44784a, 0x3d7b3b70, 0x3eb2431e, 0x3f441518, 0x3f0fce8a, 0x3eec22ee,
0x3f4c186b, 0x3e271ef0, 0x3e6a4590, 0x3f174a78, 0x3f36ab45, 0x3f2c7736,
0x3ec17b00, 0x3f752abc, 0x3dc62d48, 0x3f2639dc, 0x3f39aae7, 0x3e44f29c,
0x3e200de4, 0x3eb6e81a, 0x3f45e72e, 0x3f3560a9, 0x3f035e25, 0x3e7bad00,
0x3ef4dd66, 0x3dc91d00, 0x3ebcaa6c, 0x3f649206, 0x3f3875bd, 0x3f1e6d42,
0x3f2790d3, 0x3ef0a232, 0x3db17798, 0x3f65d44d, 0x3e672348, 0x3cd8b4e0,
0x3f7bcf6d, 0x3f3ef25d, 0x3e032ce8, 0x3e938888, 0x3ec79684, 0x3f2f7936,
0x3dda4f60, 0x3e95eede, 0x3f349424, 0x3d9caff0, 0x3f1c2c7e, 0x3f3398e5,
0x3f190799, 0x3e05dd44, 0x3edc72e0, 0x3f3675c1, 0x3f5e5a3c, 0x3f16064c,
0x3f662418, 0x3f3f4247, 0x3f0f3b48, 0x3f25daa2, 0x3f68f840, 0x3f6ffad9,
0x3c630280, 0x3f2b903a, 0x3f2c223d, 0x3effe77c, 0x3f2e2f6b, 0x3eecd0da,
0x3f342867, 0x3f5363b6, 0x3e9e7f68, 0x3f63ad4c, 0x3f3414bf, 0x3e6bcdb0,
0x3f578426, 0x3e962064, 0x3f4db122, 0x3e9c3af6, 0x3ef24892, 0x3f4c6ed5,
0x3f03c0b1, 0x3e9e3aaa, 0x3f0ba870, 0x3f134a31, 0x3f4793dc, 0x3e001670,
0x3d87d9c0, 0x3e9baaf2, 0x3ed92222, 0x3f22a2b0, 0x3ddd1fd0, 0x3f10d7f2,
0x3e288014, 0x3da30988, 0x3f4211a2, 0x3e92778e, 0x3f505e3f, 0x3f643df4,
0x3f0c2459, 0x3e20aef4, 0x3f0349c1, 0x3ee5cf92, 0x3e1474a0, 0x3f4e39d7,
0x3e349b04, 0x3e2639b0, 0x3f0ac524, 0x3efcffac, 0x3e6e2ee4, 0x3f226cb6,
0x3e5bbbc0, 0x3f5edf6b, 0x3f2aa1b4, 0x3f7dabe8, 0x3f13eb6d, 0x3f5b6432,
0x3f34d9a9, 0x3e61deac, 0x3f1d30f9, 0x3d778ee0, 0x3f764987, 0x3f5fb106,
0x3f3dcdc1, 0x3f367073, 0x3e0b0244, 0x3eafcfd0, 0x3f12cb91, 0x3dad9c30,
0x3e9d2594, 0x3dd0d1a0, 0x3d9178d0, 0x3f0e591f, 0x3f7a3f1c, 0x3dba5ad0,
0x3f65a922, 0x3e9dfd50, 0x3ec2c3f4, 0x3f390185, 0x3ea0ba1a, 0x3f53afab,
0x3f33a93d, 0x3ed4cce4, 0x3edbc1d0, 0x3f03e91f, 0x3e728234, 0x3f298ab0,
0x3e6fe3bc, 0x3dac3250, 0x3e8b58f8, 0x3f135011, 0x3eb0a3dc, 0x3f5593ec,
0x3f03f287, 0x3f71fe15, 0x3ec51e3e, 0x3d744960, 0x3f74be06, 0x3f2d671d,
0x3e384d84, 0x3f6f4615, 0x3eb3061c, 0x3f2f80b2, 0x3e69ed90, 0x3e2727b4,
0x3d410e20, 0x3ec6c27e, 0x3f03a310, 0x3edfc2a4, 0x3ea53202, 0x3e0a79b4,
0x3f49b5df, 0x3eab7e3c, 0x3ae05e00, 0x3f72d5e0, 0x3dab7408, 0x3e810eaa,
0x3bcf8e00, 0x391bc000, 0x3e8af156, 0x3f4a7e45, 0x3f4cd234, 0x3ed098fe,
0x3ee4327e, 0x3f766ab7, 0x3e80075e, 0x3f11c187, 0x3e14872c, 0x3f6b1a7b,
0x3f3d8346, 0x3f4c3cf2, 0x3ed51848, 0x3e060280, 0x3ef3f384, 0x3ed91a44,
0x3f2b44a6, 0x3d5edd00, 0x3ecd4ecc, 0x3f282129, 0x3f4233de, 0x3e82de7e,
0x3f7e5ec6, 0x3ecda3b2, 0x3e99ac3a, 0x3f3cda2e, 0x3ebb093e, 0x3ed259d4,
0x3f19d226, 0x3eac68c0, 0x3e7b6568, 0x3e9866b8, 0x3f508994, 0x3e0ce474,
0x3ee3dc62, 0x3ecb4bcc, 0x3f387866, 0x3e772898, 0x3f5bfd02, 0x3e7d2af0,
0x3f7bfbe4, 0x3e9955f4, 0x3c566100, 0x3d376530, 0x3f4e5334, 0x3f496291,
0x3f2b7072, 0x3dcfc480, 0x3e86c298, 0x3f4fd219, 0x3e82f6e0, 0x3f0924f0,
0x3e799b88, 0x3e9ecf24, 0x3de3f120, 0x3ebb2a18, 0x3dce3a20, 0x3eae78de,
0x3f07e440, 0x3f274293, 0x3f47be34, 0x3e3e2694, 0x3edbd970, 0x3f34e9a9,
0x3e963ec0, 0x3f40fb2f, 0x3e1cfe1c, 0x3f6ac0fb, 0x3ebc892c, 0x3e5993d4,
0x3f0a2574, 0x3f431243, 0x3d1f36f0, 0x3f13fa11, 0x3ee0c4b6, 0x3e368e9c,
0x3f1bbc36, 0x3f3fe30e, 0x3ec36554, 0x3eb09934, 0x3e065dc0, 0x3ef9902a,
0x3f6bb3ef, 0x3e9b5008, 0x3e5c78e0, 0x3ec9f760, 0x3e4f2254, 0x3f00d2e7,
0x3edc3108, 0x3f12dd38, 0x3f2f0fb7, 0x3b969c80, 0x3e40b1fc, 0x3f19c592,
0x3f490f1c, 0x3ee4b532, 0x3ea567b4, 0x3f04180f, 0x3c6ab140, 0x3ed87f5a,
0x3da7da00, 0x3df04a10, 0x3f1e5dd8, 0x3e1ba038, 0x3eee17ae, 0x3ec14278,
0x3b2edc00, 0x3f38a28e, 0x3f64000a, 0x3f09a1d2, 0x3f7fc3e6, 0x3cbd65e0,
0x3eb22f14, 0x3f39e132, 0x3e91fa0a, 0x3db1a4f0, 0x3e319080, 0x3ea825ec,
0x3ef927ba, 0x3f43c3c1, 0x3de54748, 0x3c643940, 0x3e873a08, 0x3f040ad3,
0x3f761a55, 0x3f5076ef, 0x3f27e6a1, 0x3c864a80, 0x3f11ab22, 0x3ee89a44,
0x3f7f5c64, 0x3f2fe4e3, 0x3f2cf469, 0x3f1dc003, 0x3f08b381, 0x3c9bccc0,
0x3f3397ba, 0x3df9d740, 0x3e227020, 0x3f67225d, 0x3d43b5c0, 0x3b13a000,
0x3e850468, 0x3eba37b4, 0x3f68d6ed, 0x3eaff3d4, 0x3f4f2667, 0x3efd5d96,
0x3d286520, 0x3e031f88, 0x3f7f9ed7, 0x3f18f491, 0x3f62acfc, 0x3ddaf220,
0x3e89721c, 0x3e5d693c, 0x3f417389, 0x3f71cf35, 0x3ecfd50e, 0x3c1c3a80,
0x3f0af1a6, 0x3ec114c6, 0x3e0b9774, 0x3ede32d4, 0x3f4e661e, 0x3f687d66,
0x3f3d77ac, 0x3f6f0343, 0x3f641d5d, 0x3f63e789, 0x3ea55fac, 0x3cd34ac0,
0x3f72a8f4, 0x3f160c23, 0x3ea2d36c, 0x3eb17e60, 0x3f2b9905, 0x3f67672c,
0x3ed67d9e, 0x3f290f1b, 0x3e28541c, 0x3f657fd6, 0x3f0b24da, 0x3ef8e85c,
0x3f1f0187, 0x3f04ea25, 0x3f35ce89, 0x3f66cd7c, 0x3f477cad, 0x3d8efe60,
0x3f7778f2, 0x3ceb0ec0, 0x3f3e30db, 0x3eb27386, 0x3f331d5d, 0x3f345742,
0x3eb82282, 0x3e109c08, 0x3ed02474, 0x3f3ecea4, 0x3f59f84d, 0x3ee0357a,
0x3e7222f0, 0x3ecca506, 0x3f02d0a6, 0x3eef0a28, 0x3f4310eb, 0x3f026842,
0x3dc64630, 0x3f0d205c, 0x3ed7565c, 0x3e9f210a, 0x3e1527f4, 0x3edc3884,
0x3f1dbe71, 0x3ed243ba, 0x3f1268ea, 0x3eabb42e, 0x3d1abe50, 0x3f5c3655,
0x3f490120, 0x3ea62188, 0x3f6732ed, 0x3e6cca60, 0x3f7118d7, 0x3ede6e68,
0x3ed7550e, 0x3f3b981c, 0x3e3a1adc, 0x3f1265a5, 0x3d954db8, 0x3efae76c,
0x3f6f78f9, 0x3f5f59bb, 0x3f368f94, 0x3ea61b26, 0x3e5a004c, 0x3f00fb35,
0x3d216a30, 0x3ebd061a, 0x3f3217ec, 0x3e1c9fc8, 0x3e7aee48, 0x3f7b5503,
0x3f4c506d, 0x3f7ccbec, 0x3f69ab33, 0x3f54b76f, 0x3f2ffe47, 0x3e9b4d50,
0x3ed17370, 0x3f6b8ba6, 0x3f096fbb, 0x3e25fe6c, 0x3db3ed90, 0x3d8470b0,
0x3ef83fea, 0x3f158e41, 0x3f6a0bf2, 0x3f3c0de9, 0x3e070158, 0x3f6ecaf7,
};
// 10,8,5,5
uint32_t kernel_vals[] = {
0x3d3114e7, 0x3d4407ad, 0xbd35d912, 0x3ca7c94a, 0xbc56a7e8, 0x3b948e90,
0xbccbb9a0, 0x3c2b9b28, 0x3ca02e4e, 0xbcceb383, 0x3c6a04fc, 0xbd37c660,
0xb799d800, 0xbc5c8848, 0xbc4ae274, 0xbcf0a620, 0x3cb33d9e, 0x3d261659,
0x3cc7aeb6, 0x3d326067, 0x3c9c9e26, 0xbbcc0050, 0x3cd0ac2a, 0xbc893ff4,
0x3b8b1050, 0xba428000, 0xbd315ffa, 0xbd0f4ef5, 0x3bbcf490, 0xbc2ab878,
0x3bc68180, 0xbbc9bb68, 0x3cd18a86, 0x3c96670e, 0x3c22f178, 0xbca5d14a,
0xbca34e20, 0x3c69da2c, 0x3c012fc8, 0xbc4e8c78, 0x3c6c85a4, 0xbc8a1926,
0xbc54d694, 0xbd031dd0, 0xbc5f05c0, 0xbbdf5d98, 0x3cfff456, 0xbc9b11c7,
0xbd0435ce, 0xbd0479da, 0xbb11a930, 0xbd09e01a, 0xbcae6513, 0x3c897392,
0xbd33a047, 0xbc90b650, 0xbbfc8990, 0x3c8228ee, 0xbca793ea, 0xbd149155,
0xba0b0b40, 0x3cf9af0e, 0xbd20aafd, 0x3b9c4c68, 0xbd08876d, 0x3c3bf5c0,
0xbc85b67a, 0x3c955286, 0x3c4ab648, 0xbca8e4b7, 0x3c4cdf44, 0xbccb04c3,
0x3c22b794, 0xbd0e93a0, 0x3d2b04dd, 0xbc6033f4, 0xbccbc0f7, 0xbd0e3688,
0xbc4bfcd8, 0xbd37700a, 0xbd4b06a7, 0x3c0ceeec, 0xbbdb7928, 0x3c47f720,
0x3d3832a9, 0x3bd083d8, 0xbd420c63, 0xbd20b7cd, 0x3d284029, 0xbd2f3a1d,
0x3cdc94ea, 0x3cc68052, 0xbc0ab8e0, 0x394d6e00, 0xbd1fc3aa, 0x3c4e2404,
0x3d0adb4d, 0x3c6f5e74, 0x3d373d99, 0xbcd89817, 0xbc582354, 0xbb25eea0,
0xbd33a903, 0xbcc14be7, 0x3b5d7630, 0xbc550a98, 0xbd280dfd, 0xbd412b6f,
0xbcda4e57, 0xbb931290, 0xbcd13840, 0xbd378128, 0xbb4bacb0, 0xbc816b44,
0x3cc4982e, 0xbbf372f0, 0xbc1ece18, 0xbc8989d0, 0x3d2dbdf9, 0xbd2d3ab0,
0x3d4754e3, 0x3c4187f8, 0xbcbd2fdd, 0x3c945352, 0x3d080845, 0x3b240150,
0x3c131a98, 0x3b7fc8a0, 0x3d282079, 0x3c047518, 0x3c9ccfca, 0x3d252367,
0x3d14eb05, 0x3d2ee1b1, 0xbc832ce6, 0xbb9290b0, 0x3ced2af6, 0xbbcd5880,
0xbd237b88, 0xbc38d38c, 0x3cd2775a, 0x3c209b68, 0xbcc059b3, 0xbc2d7688,
0x3c3664a8, 0xbd444938, 0x3bb62998, 0x3cfce4ea, 0xbd2647d2, 0x3c4f8f54,
0xbcc7f663, 0xbc706940, 0x3cf03666, 0x3c894e02, 0x3cdd4b22, 0x3d3058e5,
0xbd178a16, 0xbd33a122, 0xbcaf84fa, 0x3d2b357f, 0xbbcc8510, 0xbcf1e24d,
0x3d1811bb, 0x3d07983b, 0x3d00c77d, 0xbd367605, 0xbd4672e3, 0x3d0419c7,
0x39b31800, 0xbd492abb, 0xbc9b6eea, 0x3be18d70, 0xbd41a34a, 0xbcfcf530,
0x3cfcab42, 0xbd3e81a2, 0xbd421e7f, 0xbcc11efd, 0xbca63d6d, 0xbd331545,
0xbd38f0bd, 0x3d496ed7, 0xbc17b734, 0x3c3b45f4, 0x3c64196c, 0xbd417f67,
0x3d15ae6f, 0x3d14b5f5, 0x3c64e8bc, 0x3b57aae0, 0x3c5c3774, 0xbcca7973,
0xbcded7b3, 0xbcb2267d, 0x3ca850b6, 0xbd09ca34, 0xbcfc9c53, 0xbc99dc4d,
0xbd2dda8b, 0xbd104bc0, 0xbcd2fcc7, 0xbbbd1f80, 0x3ba3d618, 0x3b924eb0,
0x3c0f8a6c, 0x3cc38ea2, 0xbca04520, 0x3b4b43d0, 0xbc6d4e08, 0x3c1c136c,
0x3d0ad6ab, 0x3c7f40fc, 0x3d0add39, 0x3d06e91b, 0xb8853000, 0x3d46d18b,
0x3c98251a, 0xbc107654, 0xbc49e4ec, 0xbc4a6e8c, 0xbcc6af4d, 0x3d181b39,
0xbcf100ed, 0x3bed0c00, 0xbacbcf40, 0xbc2304c0, 0x3d1b6291, 0xba2194c0,
0xbc3212ec, 0xbbecaeb0, 0xbd425452, 0xbcb6dac3, 0xbc86e604, 0x3cccd70a,
0xbcc3d7aa, 0xbba5a570, 0x3c4da1fc, 0xbcbb9c3d, 0xbcf26c8d, 0xbd38e4c7,
0xbd4ab0b3, 0xbb218ae0, 0x3cce9f6e, 0x3c6a84a4, 0x3c8fbf5a, 0x3c20d718,
0x3cd7200a, 0xbcf3275d, 0xbca6530a, 0x3cd43cfe, 0x3d1aa751, 0x3d1daee3,
0x3cbf75f2, 0xbb8c1c50, 0x3cf04506, 0xbd43d9c2, 0xbbe133c0, 0xbc95d02a,
0x3a580cc0, 0x3d433091, 0xbd310a97, 0x3d22b219, 0xbd20c68d, 0xbcf093a3,
0x3a90b0c0, 0xbcd4a277, 0xbcc4ea5d, 0x3ba52110, 0xbd4584b0, 0xbc4892e0,
0x3cf9cef2, 0xbd202d7b, 0xbcf8329d, 0xb9317b00, 0xbb02cb60, 0x3d16a987,
0x3ccd0ae2, 0xbd0e07bb, 0x3ce5afe2, 0xbcba3e53, 0xbd004140, 0x3c727284,
0xbd3100aa, 0x3ce1384a, 0xbc7980ac, 0x3d220849, 0xbd3db48b, 0xbd401a28,
0xbca574ea, 0xbc3922f4, 0x3d031b4f, 0xbd32a3f0, 0xbd2c5190, 0x3d1b5ce1,
0x3c8da5b2, 0xbd1adf65, 0xbd3eaf7f, 0xbd40fb2d, 0xbc019894, 0xba3c1140,
0xbcf569ad, 0x3bede0a8, 0x3b1b9230, 0xbd23010b, 0x3c740fcc, 0xbbd867c0,
0xbc17c908, 0x3b348ca0, 0xbc5dd360, 0x3d2a569d, 0xbcdc6527, 0x3d15f95b,
0x3c943d1a, 0x3b68f8d0, 0xbce9bb5a, 0xbc0014b4, 0x3d0229a5, 0xbd4ba5e0,
0x3d13459b, 0xbab304c0, 0x3d053451, 0xbc52e2cc, 0x3c0c96a8, 0xbd334520,
0x3cc7999a, 0xbafba400, 0x3c4b8ce8, 0x3d3f28c9, 0x3d3959cd, 0x3ca50e6e,
0x3c64cc2c, 0xbd4c667b, 0xbbba0840, 0xbcf05baa, 0xbb70df60, 0x3c910432,
0x3c84d512, 0xbd388aaa, 0x3c8acbf6, 0xbc3d9808, 0xbcda55a7, 0xbc24b518,
0xbcc722f0, 0x3cad76be, 0x3c70c6dc, 0x3d2b11e3, 0x3d080f31, 0xbc220d2c,
0xbd3703ba, 0xbd191162, 0xbc6c6f40, 0xbd1de1dd, 0x3d1235e5, 0x3d09d783,
0x3ccdc1ee, 0xbd1bc0b0, 0x3d100d91, 0x3d328b8f, 0x3c9d09ae, 0x3ccd7882,
0x3d4b1a4d, 0xbd093d0c, 0xbd4c717f, 0xbceb60ea, 0x3b2b4ea0, 0x3cf9e1ea,
0xbd493907, 0x3d3ce3f1, 0x3d195011, 0xbca6a497, 0xbcc9e50d, 0xbcc9a8b7,
0xbd2c719d, 0xbd1ed948, 0xbc243d94, 0xbcdb1f83, 0x3ca5dcfe, 0xbd4afb10,
0x38343400, 0xbc8c7d06, 0x3d1dc93f, 0x3d4ada1d, 0xbc86d956, 0xbce683e3,
0x3d0fffe1, 0x3b17b100, 0x3c475238, 0xbccf00f3, 0xbb9a41d0, 0xbd1a502d,
0x3b5ba7d0, 0x3d45967d, 0xbd119e3b, 0xbc7f0188, 0xbd0cdef0, 0x3c0efb68,
0x3d3dd0f3, 0xb7ac8000, 0xbcab8b77, 0x3cba91c6, 0xbc100de0, 0xbd4bd305,
0xbbf6a4d8, 0xbca78a53, 0x3c83d052, 0x3d393393, 0x3ccea7ae, 0x3d1e4b01,
0xbd2825a6, 0xbd18795e, 0x3c6bafd4, 0xbc644f88, 0xbd2ce9d7, 0xbc0d95d4,
0x3c083834, 0x3b0057b0, 0x3cc75282, 0x3ce1beba, 0x3c3a97ec, 0x3bd0a898,
0xbcd2478a, 0xbccdefdd, 0xbc0876a8, 0x3bfed400, 0x3cc8e346, 0xbc8e1f0a,
0xbca92707, 0x39f45d00, 0x3c270728, 0xbc208c78, 0x3b499c00, 0x3d4866f5,
0x3b1b1fb0, 0x3c9e40d2, 0xbd087ff6, 0x3ca2bef2, 0xbca468d3, 0xbca16b1a,
0x3d3addf5, 0x3d0e80bf, 0xbc78d1ac, 0xbcf4ff6d, 0x3d12995b, 0x3b26b4d0,
0xbd02b830, 0x3c2f7634, 0xbd38ff10, 0x3ca8f88e, 0xbcc0a01a, 0x3d3e36f3,
0x3ce4f236, 0xbc57488c, 0xbc873f94, 0xbd078f10, 0x3c5c97fc, 0x3d26b433,
0x3c5f45f4, 0xbcb806a7, 0xbcf658aa, 0xbd4a8470, 0x3d1ac939, 0xbbb171c0,
0xbd00ee5e, 0xbc93b7e4, 0x3c21d4a8, 0x3d1a4def, 0xbd15782e, 0xbca9c733,
0xbd0d9e3b, 0xbcfdea43, 0xbcbde660, 0x3cb42d8e, 0xbd206ac0, 0xbae99a00,
0xbc220d0c, 0xbccb22e0, 0x3d166429, 0xbd068cfd, 0x3d05072b, 0xbcfbdd43,
0xbcb96ea7, 0xbb806270, 0xbc42d22c, 0xbc99f550, 0x3d13b6ef, 0xbc7b5968,
0xbcc11cb0, 0xbcd22397, 0x3d467733, 0x3d437e0f, 0x3ce33436, 0x3d45e69f,
0xbcb4e1d3, 0xbc9d780d, 0xbd44eddb, 0xbc9f8fca, 0xbcf78a10, 0xbc667634,
0xbbc440b0, 0x3c4219ac, 0x3bfc1290, 0xbabf0aa0, 0xbd0e8156, 0xbcd89f10,
0xbd22bc6a, 0xbca2091d, 0xbd231f4b, 0xbbb9ed70, 0xbc4c8ce8, 0x3d302005,
0xbce67d5d, 0x3d3315ab, 0x3d42b557, 0xbcfb3853, 0x3cbf22fa, 0x3c12c0b8,
0x395ae800, 0xbd13572e, 0xbc916986, 0xbc828f20, 0xbd0918b5, 0xbc012328,
0x3c289e98, 0x3d3b4c3b, 0xbcc988c0, 0xbce724a7, 0xbcba939d, 0x3d081539,
0x3c1c8748, 0xbd27860b, 0xbbd36d68, 0xbd32ff08, 0x3a07c480, 0x3b68ad60,
0xbc95b244, 0xbb803750, 0x3d304595, 0xbc1a6028, 0xbca8c7c3, 0xbd2183eb,
0x3bfa09e8, 0xbcf657b7, 0x3bff8f70, 0xbc4a8ccc, 0xbd08d850, 0xbd2ac862,
0xbc7f8300, 0x3cad9fc2, 0xbcbab96d, 0xbc097d78, 0xbc7fad2c, 0x3c0f1f14,
0xbc849b46, 0xbd497d13, 0xbd00be2c, 0x3bb30530, 0xbd0d0112, 0xbc06f720,
0xbc8ddc4c, 0xbcc89d13, 0x3d202a01, 0xbbaec7d8, 0x3d29e3b7, 0xbd1a09f5,
0xbca13973, 0x3cd3cd26, 0x3cebb3f6, 0xbbe50af0, 0xbd35d98f, 0x3d1f7d17,
0x3d236eef, 0xbb822f98, 0x3b77e3b0, 0x3d406aa1, 0xbccda04d, 0x3d213933,
0xbd29efdd, 0xbb52e030, 0x3cc425a6, 0xbcad5aa3, 0xbd0edd9d, 0xbc4fd994,
0x3c731dd4, 0xbc936a74, 0x3c092048, 0x3b8cdf68, 0xbd359ca3, 0x3a916860,
0x3d16e051, 0xbc452278, 0x3cff2f52, 0xbc2aa378, 0x3b1f33e0, 0xbd1008a5,
0x3d1396bd, 0xbbcee730, 0xbd32750b, 0x3c5e0074, 0xbd1d38e3, 0x3d17c565,
0xbcc91663, 0xbc58e3a8, 0x3c7060e4, 0x3d0aa399, 0x3bf3e110, 0xbd23fdb5,
0x3cee8352, 0x3d28a7f7, 0xbc4de580, 0x3cdc852a, 0x3d0e4c21, 0xbb4875b0,
0xbbd2b018, 0xbd0cd62a, 0x3c750ec4, 0xbca804bd, 0x3b607880, 0x3cb1ab1a,
0xbb9b9640, 0x3c425e0c, 0xbab5cfa0, 0xbd3bd7c3, 0x3d4cb99d, 0xbd2adf2d,
0x399faf00, 0xbb450930, 0x3c62e114, 0xbcfb6890, 0x3d19b807, 0xbc333088,
0x3ca1ce42, 0xbca8fe90, 0x3c00c3c8, 0x3d0f85ad, 0x3c4a3528, 0xb9c2e680,
0x3b587fe0, 0xbc6fd8e0, 0x3ca98c0a, 0xbabaeb40, 0xbbb38168, 0xbcd55fda,
0x3cabf766, 0xbbf93d10, 0x3d2666ab, 0xbccbd870, 0x3cb013da, 0xbc8de3f0,
0x3c853306, 0x3ca6a16e, 0x3d439811, 0xbb590460, 0x3b920898, 0x3b85bc10,
0xbce92ce7, 0x3c6c3284, 0xbafe8960, 0x3c945cc2, 0x3c754a7c, 0xbc2abab8,
0x3c7b58dc, 0x3d08e483, 0xbd126588, 0xbc968340, 0x3d24cd49, 0x3cb3d2da,
0xbd2d76eb, 0xbc813a44, 0xbd39e80d, 0x3cc53a6a, 0x3d0ebf09, 0xbbb9a7f0,
0x3d0b9495, 0xbcee629d, 0x3ce14c82, 0x3c8c3152, 0xbbac1070, 0x3cf3a29e,
0x3cf1d7da, 0x39dc3700, 0x3d485977, 0xba38fb80, 0x3cfcefb2, 0xbcc5326d,
0xbd0244a4, 0x3ae3e240, 0x3ad2db40, 0xbd248bd0, 0x3d4c15c9, 0x3bbe53a8,
0xbcc67bc0, 0xbd080328, 0x3b610de0, 0x3c2f094c, 0xbd40ed1d, 0xbcea71b3,
0xbcf7154d, 0x3d30698f, 0x3cd21802, 0x3c18a814, 0xbcd07c67, 0x3cfa565e,
0xbcef7d00, 0x3c8ba85e, 0xbc8159b0, 0xbca6ffcd, 0xbd05df9a, 0x3c309480,
0xbd0d905e, 0x3d2f28ab, 0x3ab1e760, 0x3c6e6cc4, 0x3d0dced9, 0x3be71b70,
0xbd01b3b6, 0x3d3f7f8b, 0xbbb3e6b0, 0x3c429918, 0x3cdf0662, 0xbba3ee28,
0xbca5aaed, 0xbaa6f360, 0xbd352b5f, 0xbce29c30, 0x3bae5b50, 0xbcf5ecd3,
0xbd1b9263, 0x3c6e55fc, 0x3d095799, 0x3cfc7d6a, 0x3c90a572, 0xbab16840,
0x3cbcd04a, 0x3a97d940, 0xbd04a19c, 0xbd42e445, 0x3c595cd4, 0xbc7c71c0,
0xbd31da0d, 0xbc962a74, 0xbd0c49b0, 0xbd1443b5, 0x3a8b8060, 0x3d2a8f6d,
0xbc04f974, 0xbd1fdeb0, 0xbd3aed78, 0x3c4628e8, 0x3d2145d5, 0xbb6fd580,
0xbc8fa2da, 0xbcced14a, 0xbadfd860, 0x3ce723f6, 0xbd28aca5, 0xbca54a13,
0x3d45bed1, 0x3cd6db22, 0x3c8338ba, 0xbd45e5e7, 0xbd330b0d, 0xbce8685d,
0xbd47ad03, 0x3c0cfcc0, 0xbd2a62ba, 0x3cbd023a, 0x3d49da49, 0x3c23ee28,
0x3d2c5c47, 0xbcf8b1b0, 0xbd2c365b, 0x3c59734c, 0x3ce80486, 0x3d464e63,
0xbd2d7b1f, 0xbc804414, 0x3d463d95, 0x3ce1367a, 0xbd332f6f, 0xbc972fda,
0x3cca32e6, 0x3d23aff5, 0x3d3fb20d, 0xba892400, 0xbca38ac3, 0x3b883350,
0xbcfe11c7, 0x3d3bf377, 0x3bc73210, 0xbc61e0ac, 0xbd131c43, 0x3a0ddc80,
0xbca5ecbd, 0xbd0f1b78, 0x3c69512c, 0x3d35d1f1, 0x3cc28532, 0xbbff91c0,
0x3b51c780, 0x3c03fcc8, 0x3cb255a2, 0x3c230300, 0x3d0815e7, 0x3bacb8c0,
0xbd039c7a, 0xbb3584d0, 0x3d1bfac9, 0xbd3ae958, 0x3cefc6a2, 0x3c235ae8,
0x3ccab992, 0xbd370b4b, 0x3a732200, 0xbd461592, 0x3cc961f6, 0x3c838242,
0xbc9cced3, 0x3d27de81, 0xbc8344fc, 0xbc7faee8, 0xbd1e254b, 0x3d469e51,
0x3ce20ebe, 0x3c2f144c, 0xbc357d2c, 0xbc3620e8, 0xbc04a334, 0x3c5956a4,
0xbc8ba3c4, 0x3bca29e8, 0x3d17d1e3, 0xbba196e8, 0x3c8c295e, 0x3d2c4267,
0x3c983e9e, 0x3d09932f, 0xb9cddb00, 0xbd090ac2, 0x3c2467e0, 0x39fd2400,
0x3d0f0b43, 0x3ca1e1d6, 0xbba80d18, 0xbcc25020, 0xbcc3dcb0, 0xbbe231e8,
0xbd26d855, 0x3adee9c0, 0x3d3ef06f, 0xbd2c23e5, 0x3d2cba01, 0x3cd42aca,
0x3ac605a0, 0xbcc3951a, 0x3b32c4b0, 0x3ce38f9a, 0x3a6874c0, 0xbb147a00,
0x3c7019a4, 0x3c9e6102, 0x3b0e2d80, 0x3c7dbafc, 0xbd20fbd8, 0x3d436619,
0xbd434c55, 0x3bc58228, 0xbd3591bd, 0xbbd1a028, 0x3c163ff8, 0xba18cb80,
0xbc6d2034, 0xbbc6aaf0, 0x3d1be929, 0x3cf2d14e, 0x3d3ecf11, 0xbce0bd70,
0x3cf668b2, 0xbd304c52, 0x3d0f5a29, 0xbb3c8050, 0x3d2a76fd, 0x3cdfec42,
0xbc131ed4, 0x3c8715da, 0xbced47e0, 0x3caca7c2, 0xbb68ff00, 0xbd2bfced,
0x3c6bbf0c, 0xbd313687, 0xbba436a8, 0xbcd181d7, 0xbd37cf83, 0x3c5b8504,
0xbd082a58, 0x3c96080e, 0x3cde49b2, 0x3a8d1bc0, 0xbd32c9b7, 0xbbaeaad0,
0xbc80155c, 0xbc08e3a8, 0x3ca31582, 0xbbea7eb0, 0x3d4b33a9, 0x3cd27dda,
0xbc883e6c, 0xbc9deb03, 0x3ceda292, 0xbc9d334a, 0x3cab4f56, 0x3d46cadd,
0xbd339477, 0xb98b6900, 0x3c947fb6, 0x3d023c31, 0x3c99d8a2, 0xbd1473f8,
0x3c3642c8, 0x3d2980c5, 0x3c5b1c54, 0x3d3bb0f1, 0xbd031e18, 0xbad1c9a0,
0xbccc6d0a, 0x3c952096, 0xbcaa9d87, 0x3cf9b81e, 0x3bfe83a8, 0xbc9c417a,
0x3af637c0, 0xbca5ffc3, 0x3cf64072, 0xbc8c5214, 0xbcb6240d, 0xbd30cb48,
0xbc1c45cc, 0x3d3953f1, 0xbc29d26c, 0xbd33c0e5, 0xbd130e08, 0xbd2e02cb,
0x3acbdc60, 0x3cef5bae, 0x3d0197ed, 0xbd1cff72, 0xbd11b5a0, 0x3d1b8873,
0xbd38de4d, 0xbd476057, 0x3d239081, 0xbc05e78c, 0xbc94c6f0, 0x3d00f2b7,
0xbbeb7c68, 0x3d307db1, 0x3d2f397f, 0x3d3b5935, 0x3c114f98, 0xbcc65a4a,
0xbd34016d, 0xbd05a335, 0x3d0d3551, 0x3c59b1c4, 0xbd235a40, 0xbd0a2bea,
0x3ccc2556, 0xbbfd6258, 0x3cd81886, 0x3d41dcc5, 0x3d37ecf7, 0x3cae1086,
0x3c73a234, 0x3d1c71a9, 0xbd3ca15d, 0x3d43e907, 0x3c94baae, 0xbd4b5aca,
0x3d09daff, 0x3c53a574, 0xbcf09773, 0x3b3b13b0, 0xbd27229d, 0x3d2593df,
0xbd2c7f62, 0xbd1eca76, 0x3c0888c8, 0x3b860140, 0xbcb67bb0, 0xbcf435aa,
0xbd2e8ce2, 0x3b89b750, 0xbccdf04a, 0xbcdbd9fd, 0xbc1118c0, 0xbd4c0207,
0x3ca91bf2, 0x3d2e3cd1, 0xbc160cac, 0x3c9bfa22, 0x3c031e94, 0xbbd129b0,
0x3d25f675, 0x3cda9792, 0x3d2aedb3, 0x3d412a1f, 0xbd0a4846, 0x3cdd4c76,
0xbcc4248a, 0x3c27b0a0, 0x3a615940, 0xbc66b220, 0xbd2e8bb8, 0x3d49ae11,
0x3d4332d9, 0xbcfc2100, 0xbd2ac383, 0x3cd667c6, 0x3d0c976d, 0x3c85c5fa,
0x3ba20c28, 0x3cf6ef96, 0x3c4b5c68, 0xb9b6ba80, 0xbcbafbf7, 0x3b0a1ee0,
0x3cee6332, 0xbc404a0c, 0xbc0f05f8, 0x3d1b3bcb, 0x3d4820bf, 0x3d2c90c9,
0x3d0d3843, 0x3b7f07d0, 0xbc6e3cd4, 0xbd017f98, 0xbbe09b70, 0xbc564360,
0x3d310a81, 0xbc68efa0, 0x3aaa1800, 0xbd4b4008, 0xbb92add0, 0x3d0a26d3,
0xbb03ccb0, 0xbb88e0d8, 0xbd0d3143, 0x3cd98022, 0xbcfba76a, 0xbcb0efaa,
0xbcb783ed, 0xbd2702ea, 0x3c23e634, 0xbd368ec2, 0x3bbb2b18, 0x3d43a38b,
0x3c07f7f4, 0x3c0f2cc0, 0xbca0230a, 0xbd451f0a, 0xbc8313cc, 0x3d4670e1,
0xbd406357, 0x3cbf59fe, 0xbca8e0ed, 0xbcb9bb3d, 0x3c817452, 0x3c900d2e,
0x3bd8d158, 0xbd2977c3, 0xbc3dd788, 0x3d12260f, 0x3cff63ea, 0xbcdeb8c3,
0xbced00da, 0x3ce76e82, 0xbcc8f677, 0xbc6648b4, 0xbd449ada, 0xbc9af66d,
0xbcbf552d, 0x3cdb28da, 0x3a1a6680, 0xbd1d79c0, 0xbcef2c2a, 0xbbf520b0,
0xbabc0a00, 0x3c8d280a, 0xbc989136, 0xbd0a489a, 0x3c368168, 0x3cc19ade,
0x3d2c7f03, 0xbd322e52, 0x3cb94f62, 0x3d0b907d, 0xbcb2682a, 0x3c09f140,
0x3bd4a1e8, 0x3d2550e5, 0xbced6c9d, 0x3d1c208f, 0x3d029b61, 0x3c80bfd6,
0x3c868faa, 0xbcd907aa, 0xbd31def2, 0x3d1d9951, 0x3cd8f40a, 0xbcf5fbd0,
0x3c9fcf6e, 0x3d32e6bf, 0xbc598380, 0xbd404c47, 0x3d030313, 0x3add26a0,
0xbc23c368, 0xbcbc4ff7, 0xbcfb37d7, 0xbd0f0d1a, 0x3d2cea83, 0xbcfc20f7,
0xbc3e6fa0, 0x3d28f981, 0xbc44ed28, 0xbc5752c0, 0x3bd6f0a8, 0x3d47bcb9,
0xba1b8b80, 0x3d00db71, 0x3b4f5150, 0x3c180534, 0x3ac24e00, 0x3d23a575,
0xbcb0afaa, 0x3c3df058, 0x3bdacd10, 0xbc2f4de8, 0xbcebcbad, 0xbc044674,
0x3d2a7241, 0xbd351873, 0xbcc99800, 0x3c644aa4, 0xbc93dba0, 0x3bd56c70,
0x3c22a874, 0x3c29316c, 0xbccde2fa, 0x3d04bf69, 0xbd2b2bd2, 0x3c24f6b4,
0x3d006067, 0xbd016525, 0xba8bdcc0, 0x3c7f18dc, 0x3cfa8832, 0xbc4c5414,
0xbcdd47ca, 0xbcfd17f3, 0x3d3dfcef, 0xbc986150, 0xbc7f99f8, 0x3d47203f,
0x3c1df868, 0xbcb19b1d, 0xbcec124d, 0xbc249dac, 0x3c8d9db2, 0xbcb76dc7,
0xbc90ab9a, 0x3d2d7e8b, 0xbd0ecbfb, 0x3b9ad180, 0x3d229639, 0xbd44e212,
0x3c86b72e, 0xbc825a46, 0x3cb2e2c2, 0x3ce0e25a, 0x3ccd776a, 0xbbec5d28,
0xbb71f950, 0x3c998342, 0xbc0e10a8, 0x3d38ba4b, 0x3d1626a9, 0x3cc00aa2,
0xbd3bfb45, 0x3c43b2d8, 0xbc601b14, 0x3bae2280, 0xbb8abdd0, 0x3d3ef73d,
0xbd47cbeb, 0x3d18422b, 0xbd079f7c, 0x3adfe460, 0x3d3962e7, 0xbd1ec823,
0x3ce4f25a, 0xbc419248, 0x3d0f8593, 0x3d39e519, 0x3d279cd7, 0x3ca695e2,
0xbce8d18a, 0x3c8369fe, 0x3c7b33dc, 0x3c92c912, 0xbd02a74e, 0xbce951ea,
0x3cddb652, 0xbd438bb0, 0x3c670944, 0x3d077419, 0x3aee6d40, 0xbcccddda,
0x3cd07792, 0x3aeb1140, 0x3d00ab6d, 0x3cdab052, 0xbc83a6c0, 0x3d378b65,
0x3d18ca3f, 0x3b5e9dd0, 0xbca3cd5d, 0x3cc7db5e, 0x3cece702, 0xbcdb7367,
0x3d2e6291, 0x3d23da33, 0x3c4d13ec, 0x3c9fef32, 0xbd25bf5b, 0xb9252b00,
0x3ad8d6a0, 0x3c337420, 0xbb6c2bd0, 0x3d031713, 0xbc4f236c, 0x3c51b244,
0xbc44ad4c, 0x3c9474f2, 0x3c063458, 0x3c13228c, 0xbbbba390, 0x3b4f2c60,
0xbc20288c, 0xbc1c6ec0, 0x3d2342e9, 0x3c6b03fc, 0x3b9fd890, 0xbbe72070,
0xbc351b0c, 0xbc4d3e14, 0x3cbe837a, 0xbb30cb30, 0x3ce17856, 0xbb8c5a58,
0x3c074738, 0x3c382288, 0xbbcd2b28, 0x3c82507a, 0x3b2a0b60, 0x3d44130f,
0x3c10d9e0, 0xbabce6a0, 0xbadf8600, 0xbb739c80, 0xbc600f80, 0x3c82c276,
0xbd2226f5, 0x3d3ff37f, 0x3d4426ad, 0x3d22f737, 0xbc591d08, 0xbd24f663,
0x3bdd6390, 0xbd386275, 0xbc866100, 0x3c695014, 0x3c814c0a, 0x3d3f3311,
0xbc6bc1e0, 0x3d32ca43, 0x3cb7d7ae, 0xbba1e9d0, 0xbd4bd5fa, 0x3ba978d8,
0xbca2af5a, 0xbb2bc200, 0x3cb7bb0a, 0x3d0ba59f, 0x3d169ef1, 0x3b0a2650,
0x3d1fc229, 0x3cfa4662, 0x3c9529de, 0x3cd13772, 0x3cd6f05e, 0xbca93473,
0xbcafe123, 0xbd02a278, 0x3c3c0cd4, 0x3c894c4a, 0x3c41bd00, 0x3c5ca0a4,
0x3d1b717b, 0xbcd16950, 0xbc7de328, 0xbd3cf5ef, 0xba650800, 0xbd3e2408,
0xbb54cbb0, 0xba0f8cc0, 0x3cd82822, 0x3d3d792f, 0xbc9516b4, 0x3b1d1d50,
0x3d368979, 0x3c5e6dec, 0xbd3cf378, 0x3d3a8635, 0xbd4662e0, 0x3ca3eb6a,
0x3bd87628, 0xbd4aa05b, 0x3cc1540a, 0x3d11f57f, 0x3c6448c4, 0x3a90a600,
0xbd25e66b, 0xbd3333bf, 0xbc35e6e8, 0xbca0f943, 0x3b20bee0, 0xbd1881d8,
0x398e8580, 0xbd1f24b5, 0xbc42176c, 0x3d46a8a7, 0x3d17a7fd, 0x3ca6c69a,
0xbc153748, 0x3bb1acf0, 0xbd2a041b, 0x3caf685e, 0x3ac27160, 0x3c1830a0,
0xbc5498b8, 0x3c462634, 0x3d08fa25, 0xbd1eb5a2, 0xbc1f14d4, 0xbceeee57,
0x3ceae45a, 0x3c4c2028, 0xbca0930a, 0xbcad99ed, 0xbd01bb5a, 0xbc541b68,
0x3d47d671, 0xbc8964d0, 0xbc3b78f8, 0x3cbf18fa, 0x3d2a8f6d, 0x3bad6668,
0xbcc31657, 0xbcdf69d7, 0xbc216f8c, 0xbc76a434, 0x3d06df89, 0xbd2d9123,
0x3c8ffc22, 0x3cd98b1a, 0xbb93ef10, 0x3d4a7163, 0x3d0d6471, 0x3c02b808,
0x3b9e7940, 0xbc331560, 0x3cfa9c82, 0x3cd98a2a, 0x3ad2af00, 0x3d16e8bf,
0x3d04c911, 0xbcb0a740, 0x3d0eae19, 0x3d42eb55, 0x3c9cf206, 0x3d3a18c9,
0xbb4e7e50, 0xb9f4ad00, 0xbcf3437a, 0xbd2d651f, 0x3c2297ac, 0xbd3bb2c8,
0xbc5efd4c, 0xbc949774, 0x3cc4f6a2, 0xbd0a815a, 0x3cee9902, 0xbcbb15a0,
0x3c82e192, 0xbd1b7e8e, 0xbcf11be0, 0x3bbbe510, 0xbce9d433, 0xbd13d5bb,
0xbc6815ec, 0x3c89ceb2, 0x3cee4ede, 0x3c6b3384, 0xbd112576, 0xbcda1fa3,
0xbc8a3dca, 0x3c51d724, 0x3cf2124a, 0xbbe8eeb0, 0xbcdb7f5d, 0xbd2cc46e,
0x3d3909f3, 0x3c75b3fc, 0x3d1b4d4f, 0x3c8dcb66, 0xbbf7bad0, 0x3c82e00a,
0xbca273e7, 0xba8bc8a0, 0xbc7053f8, 0x3c9c67ae, 0xbb958c40, 0x3c20db00,
0x3c1b5a28, 0xbc9967d0, 0x3ca42a9e, 0xbce59ef3, 0xbd31c562, 0xbd01404c,
0x3d06f385, 0xbc8bcd74, 0xbb05c3b0, 0x3cbbf1f6, 0xbcf06560, 0x3d13e9e9,
0x3c083118, 0xbd183ebb, 0x3cda6dd6, 0xbd29999b, 0xbabd2ea0, 0xbce821b0,
0x3c419c60, 0xbd2b8af8, 0x3d1f3849, 0xbca0c1ca, 0x3c5a8f1c, 0x3d1ce21b,
0xbcaf98e0, 0x3d3c0893, 0x3d0a853f, 0x3cf646aa, 0x39affb00, 0xbd389690,
0xbd4b39d3, 0xbb503720, 0xbbb53590, 0xbbd704b0, 0xbc37d514, 0xbd0719dd,
0xbae6c6a0, 0xbcdbf147, 0xbc20dd08, 0xbd4c05fd, 0xbc81f7f0, 0x3bf4ba30,
0x3cd79452, 0x3d452637, 0xbc461978, 0x3beec000, 0x3d338637, 0x3c9bf462,
0xbd32ee0f, 0x3c22b3a0, 0x3d29b317, 0x3d3c7313, 0xbc376740, 0x3c8c37a2,
0x3d0ca591, 0x3b46b2a0, 0xbc4f2848, 0x3c721f2c, 0x3c8cd96e, 0xba25f740,
0xbbd8b2e8, 0xbb5a3650, 0xbc22d698, 0x3cd440fe, 0x3d1f4db9, 0x3d4323b9,
0x39689e00, 0xbd07b34e, 0xbccfa89a, 0xbb9e7b28, 0xbd494eaa, 0xbd385b07,
0xbbb5fa98, 0xbcbaf4d7, 0x3cc7dc46, 0xbcb7a5dd, 0xbb0a16b0, 0xbb51f160,
0xbd3c0b1a, 0xbc1142ec, 0xbd3f8dd5, 0xba843260, 0x3ca5cc22, 0xbd26a015,
0xbce361f0, 0xbc10a48c, 0x3c9f7b6e, 0x3c9287de, 0xbc81e2a4, 0xbd37b89b,
0x3d480471, 0xbd14a0eb, 0x3d234b61, 0xbc89835c, 0xbcbccc1d, 0xbd291efa,
0xbcf1d68d, 0xbbd96c40, 0xbcb922aa, 0x3c80bdfe, 0x3c7c8024, 0xbd105d62,
0x3d244d31, 0x3cbbe22a, 0xbcb32eb7, 0xbcd1cb73, 0x3d0e8799, 0xbb920a68,
0xbd2e2b60, 0x3cbdb9e2, 0xbcfa0777, 0xbd06be54, 0xbd24d3bb, 0x3d3683c3,
0x3ceffe3a, 0x3ccc9cca, 0x3c3e2b00, 0x3ca3238e, 0xbd37e2b0, 0x3d11c961,
0xbd4ae8a3, 0xbd486c65, 0xbb8237e8, 0x3d30f539, 0x3d14c629, 0xbd4193eb,
0x3d26de35, 0xbd25110b, 0xbd1cc35a, 0x3c810422, 0x3d3cb60d, 0x3d48e591,
0xbd044924, 0x39545e00, 0x3d09ce5f, 0x3cef5336, 0xbb5d5b50, 0xbd037c0c,
0xbcb4b237, 0x3d4a11b9, 0xbcf4825a, 0xbd168eca, 0xbd2f5fad, 0xbba23d80,
0x3ceb122e, 0x3b070ed0, 0x3c4e9b4c, 0x3c580244, 0xbd461647, 0xbbc52830,
0x3d2c6e15, 0xbc8c15cc, 0xbd0d8fd2, 0x3be4a1f0, 0xbc210068, 0x3ca9a456,
0x3cc74eba, 0xbd1a8588, 0xbc784c48, 0x3c8cfe52, 0x3d2dafa9, 0xbc666754,
0x3cbad202, 0xbbdb5b28, 0x3c49e0f8, 0xbd3035cf, 0x3cc6bd0e, 0x3d17fb77,
0x3b60c620, 0xbd34bfc3, 0x3cdd6aa6, 0xbd1da1de, 0xbd1d27b2, 0x3ba27e28,
0x3cde5c2a, 0xbd4c18b2, 0xbcbcc0fd, 0x3b6fb6e0, 0xbc227260, 0x3cc3e3e2,
0x3cda3926, 0x3c0f5880, 0x3d452a2f, 0xbcca98d0, 0xbd462d60, 0xbd0ba370,
0x3cd64fb2, 0xbd4a8e37, 0xbd05dfee, 0xbc1a9bd4, 0xbd268438, 0xbcf40b2a,
0xbd4a88bd, 0x3c603f74, 0xbba3e3f0, 0xbbd827a8, 0x3c8485b2, 0xbd3ee2c2,
0xbd466335, 0x3c846b4a, 0xbd3703c0, 0xbd0ffab3, 0xbca240fd, 0x3ceacad2,
0x3c4fbdb4, 0x3c0c45c8, 0x3d05a8d5, 0xbc5c3f28, 0xbd3ea837, 0xbd129b55,
0x3cb3689a, 0x3d26abd1, 0x3d0cf0e3, 0xbcbe0683, 0x3ce1872a, 0xbc4cca28,
0xbc85cbca, 0xbb3e8460, 0xbd0e79e3, 0x3c89b682, 0x3d382369, 0xbd0e41a0,
0x3c99454a, 0xbad781c0, 0xbc811614, 0xbd37d59f, 0xbcc4fdb3, 0x3b3baa60,
0x3d470b9b, 0xbcb15893, 0xbd2e08ef, 0xbcab4813, 0xbbdd75e8, 0x3d092ff3,
0x3d091ac5, 0xbcbe0f03, 0x3d009871, 0xbd1deac2, 0x3d47da6f, 0xbc7323f8,
0x3ce8096e, 0xbcc2410d, 0xbcffbc97, 0xbbbd9830, 0x3d459729, 0xbc136060,
0xbd0330e4, 0xbce041ed, 0x3c98ac5a, 0xbd10a4b2, 0xbd3e3037, 0xbd206468,
0x3d34e981, 0x3c389ea0, 0xbd242522, 0xbcbe9850, 0xbcd60ee7, 0xbcfb070d,
0xbb028f80, 0xbbea97e8, 0xbbaa1f28, 0x3d18b097, 0xba530cc0, 0x3d1a05c9,
0xbd17b3ba, 0x3c81adf2, 0x3d21a6a3, 0xbd302f33, 0xbd28c162, 0xbc43e194,
0x3c277c58, 0xbcd14130, 0xbb89d3a8, 0xbc3f92d8, 0x3d3b5e07, 0x3bdde368,
0xbcec6d4d, 0xbbbdede8, 0xbabb21c0, 0x3cddbbd6, 0xbd25cc2e, 0xbc6c92c8,
0xbccb1030, 0xbcdc1163, 0x3cfb8c12, 0x3d3f2e85, 0xbd3707b8, 0x3c282b20,
0x3b7145d0, 0xbd115813, 0xbbc6f800, 0xbd103956, 0x3ba25528, 0xbd2697ab,
0x3cfb773a, 0x3d38ad2f, 0x3bf5df80, 0x3c631b0c, 0x3d46ce7d, 0xbc743eec,
0xbc589f8c, 0xbd3a9070, 0xbd2e9e9b, 0xbccaef27, 0xbcf61793, 0xbcfd47a0,
0xbd048d2d, 0x3c33edc8, 0xbca6d920, 0x3d16f5a3, 0x3bd1a650, 0xbc916a34,
0x3ca1a002, 0x3b86b698, 0x3cc09626, 0x3d382fdf, 0x3cd125ba, 0xbcc69920,
0x3bd58e18, 0xbb379360, 0x3ccf4b92, 0x3d3c2fd1, 0x3be5cd10, 0x3926e600,
0x3d1a42b1, 0x3c4412cc, 0xbc251cac, 0xbcba31ea, 0x3c98b6b2, 0xbbb536b0,
0x3c8b7ca6, 0x3cb01d82, 0x3cac849a, 0x3c575ec4, 0xbc6ff768, 0xbd43457b,
0x3bc20340, 0xbcfe39ba, 0xbd2dcad5, 0x3d1c6923, 0x3d20d2a9, 0x3ccd6d42,
0x3d140969, 0xbd47ea7f, 0xbc9d1967, 0xbad11440, 0x3d3fd6b3, 0x3d0406db,
0xbcd0d390, 0x3d0117c9, 0xbb4abfd0, 0x3ca4b0a2, 0x3d3c14df, 0xbcc52653,
0x3be00400, 0xbc633560, 0x3b9ba198, 0xbca1ecad, 0xbd148732, 0xbcf05240,
0x3d3c4535, 0xbd2df2a7, 0x3bdc7bf0, 0x3d1a9d01, 0x3b04afd0, 0xbcbf7093,
0x3d10cf11, 0xbd20fecb, 0x3c5a2294, 0x3cfaa8c2, 0x3d4544a1, 0xbb778fb0,
0x3bd6c468, 0x3c533e64, 0xbb03f380, 0xba8cc760, 0xbd1b780a, 0xbc33f834,
0x3ca93136, 0xbcee5fa7, 0x3d4824bd, 0xbc8c2364, 0xbc96c32c, 0x3b5274b0,
0xbd40acb2, 0xbb0aa3b0, 0x3c5e3a04, 0x3cb05e5a, 0xbbf5a490, 0xbd469270,
0xbcb1613d, 0x3c4d4104, 0x3d29fd19, 0xbd3ca957, 0xbd367eca, 0xbcf4b8b0,
0xbd4899d8, 0x3c4ad04c, 0x3cd504aa, 0xbd292aa0, 0xbc93fb1a, 0xb8927000,
0xbcb399bd, 0xbcb1882d, 0x3cdf1e82, 0xbd154a58, 0xbba65590, 0x3d223bf5,
0xba21a2c0, 0x3c9cadfe, 0xbccd19c3, 0xbd063e1e, 0x3d2fa8af, 0xbcaad777,
0xbd493cf5, 0xba19c780, 0x3cdf4afe, 0x3cf71c46, 0xbd0e8150, 0x3d2b94df,
0x3c9890e6, 0xbc875256, 0xbb92a798, 0xb8d05400, 0x3b83e610, 0xbcf30377,
0xbc970b7a, 0x3cb85f32, 0x3d0aeb31, 0xbd100dc5, 0xbd2ec743, 0xba81f1a0,
0xbcd2f36a, 0x3c8b8912, 0x3cd213ce, 0xbcd8505a, 0x3caf84ca, 0xbd1a1f43,
0xbd22fc05, 0xbc38fb40, 0x3c29ffa8, 0x3d21e4f9, 0x3d336049, 0xbc29fb14,
0x3d4c8f65, 0x3d0156b9, 0xbc9c1a63, 0x3bf1d810, 0x3d2f3379, 0xbcc6024d,
0xbd2b784e, 0x3cc61f72, 0x3bcad3e8, 0x3d1d16c7, 0x3c493368, 0x3d4a3853,
0x3d2f9a0f, 0xbd18cc55, 0x3ca27c92, 0xbc0e0578, 0x3d2f9f6b, 0x3d25c15f,
0xbccba443, 0x3d2861f9, 0x3cdd1c26, 0xb9bba980, 0x3c215ce8, 0xbc6fe358,
0xbd436fd3, 0xbc5fa958, 0xbcfd9ef3, 0xbc2e3d88, 0x3c9630be, 0xbd019f08,
0x3c552b0c, 0x3ccead72, 0x3d3161b5, 0xbd349167, 0x3cfb291a, 0x3baf3a70,
0xbd30eaef, 0x3d36d16d, 0xbbff9db0, 0xbd05cfe5, 0xbd46d333, 0x3a5d36c0,
0xbd2f322b, 0x3c6ea574, 0x3cc23a2a, 0xbd087a4d, 0x3c9e21b6, 0x3c8b4572,
0xbcfb10fd, 0x3d256731, 0x3ca1cd0e, 0xbd4060a8, 0x3c9c80e2, 0x3d0bb7b1,
0x3caec47a, 0xbca2cfaa, 0xbcd33083, 0xbbd930f0, 0x3d2a8e01, 0x3a034b80,
0x3c964966, 0x3d2e454f, 0xbd1daa35, 0x3d42e051, 0x3cb0dc8e, 0xbd03e9f0,
0x3ce23c82, 0x3d2b9c51, 0xbad26360, 0x3cf6b6c2, 0x3c5ccecc, 0x3d0d4d23,
0xbd2023dd, 0xbd080fdd, 0x3d27cddf, 0x3d4c3a39, 0x3c8303fa, 0x3cce2002,
0xbd420ceb, 0x3ce895e2, 0x3d1dd9a3, 0xbc269ba0, 0xbcce26cd, 0x3ce6a7ea,
0x3cbdf30e, 0xbd48fe87, 0x3c5c97a4, 0x3c961dfa, 0x3c323fb4, 0x3d1aa5ef,
0xbb308e50, 0x3d0699af, 0x3cbf1eb2, 0xbd0a3460, 0x3ba9a618, 0xbcdfe007,
0xbc13b634, 0xbc5bbbe0, 0x3d2a4e3f, 0xbcd5f22a, 0x3c76f9f4, 0xbc9b65cd,
0x3cb59b36, 0xbcaa9fd0, 0x3ccb71da, 0xbd38c728, 0x3cc6f0ca, 0xbd1d5c6a,
0x3d320255, 0xbd3a9ed5, 0x3b3d4930, 0xbd3aaa4d, 0x3c9e2a82, 0x3be26210,
0x3b52f560, 0x3cbaf15a, 0xbc9efa8a, 0xbd0726e6, 0xbd2c5ebd, 0xbd0af8a2,
0x3d26a0d7, 0x3cc926b6,
};
// 5
uint32_t bias_vals[] = {
0x3bded4d8, 0x3c9d39d2, 0x3ca89fd2, 0xbc5af538, 0xbcb69fcd,
};
// 3,1,1,5
uint32_t output_exp_vals[] = {
0x3c0f5041, 0xbd5feb0d, 0xbe2ac302, 0x3e4629df, 0xbf31fe38,
0x3e5c01b4, 0x3e7c96f6, 0xbce63e5a, 0x3e379fba, 0xbf3027ad,
0xbdb021b6, 0xbe97d08d, 0xbef57ffa, 0xbdfbe7fc, 0xbf1bf24c,
};
uint32_t clip_value = 0x3e4ccccd;
// 3,1,1,5
uint32_t output_relu_exp_vals[] = {
0x3c0f5041, 0x0, 0x0, 0x3e4629df, 0x0, 0x3e4ccccd, 0x3e4ccccd, 0x0,
0x3e379fba, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0,
};
test_conv2d(set, strides, input_vals, kernel_vals, bias_vals, output_exp_vals,
output_relu_exp_vals, VALID_PADDING, (void *)&clip_value);
}
void test_same_padding_non_zero_strides_medium() {
input_set *set = &medium_input;
strides_input_set *strides = &medium_non0_strides;
// 3,10,8,5
uint32_t input_vals[] = {
0x3dedbd80, 0x3f3c7b79, 0x3e131b74, 0x3f07e9cb, 0x3e936dc0, 0x3f2a452c,
0x3f63ab13, 0x3f64a605, 0x3f17672a, 0x3ed1273a, 0x3ebebdd8, 0x3f0c947e,
0x3f5af4f0, 0x3e672280, 0x3f222b95, 0x3e84c4d4, 0x3f481888, 0x3eb31760,
0x3f282381, 0x3d832688, 0x3f50d901, 0x3e87184e, 0x3f130b99, 0x3ef625d8,
0x3f039d45, 0x3ef74f90, 0x3f47739d, 0x3f0f95c4, 0x3eb4895e, 0x3f3d0eef,
0x3e5b5400, 0x3f051d17, 0x3edcaddc, 0x3eff1d44, 0x3f1009db, 0x3e4da22c,
0x3f550f4d, 0x3e286098, 0x3c7b4a80, 0x3ee87202, 0x3f788b9a, 0x3f7a6028,
0x3f697980, 0x3e1754a8, 0x3f524aa1, 0x3f5341de, 0x3df96408, 0x3f3efd3b,
0x3f10f16c, 0x3f37ada8, 0x3e10ddac, 0x3f798359, 0x3da84670, 0x3f7b9cca,
0x3d6356b0, 0x3f000faa, 0x3f61b5d8, 0x3de66248, 0x3e5086a8, 0x3f4b5467,
0x3efa7626, 0x3dc718f0, 0x3f79b8e4, 0x3f520efc, 0x3dd857c8, 0x3f3d0355,
0x3f2585a9, 0x3f146def, 0x3f0bb6e8, 0x3f47e6ab, 0x3d92fb78, 0x3dbe5a80,
0x3ec52f64, 0x3e91dc5c, 0x3f6bc833, 0x3f01e701, 0x3f29857c, 0x3f56c01c,
0x3f0023e7, 0x3f0bcbe5, 0x3f2aa5fa, 0x3f13cb3e, 0x3f24c58f, 0x3f3d5dcf,
0x3ea97abc, 0x3f4123bc, 0x3cb9e960, 0x3f2df2c2, 0x3f0f3a6d, 0x3e778748,
0x3f0d6252, 0x3f2e7767, 0x3f658c24, 0x3f714eda, 0x3e8e536c, 0x3e811bb8,
0x3eb9ae8e, 0x3eec050e, 0x3ecf8cd2, 0x3ed0cf9e, 0x3ca3c7c0, 0x3f7f9666,
0x3f79f7d1, 0x3e2a18a0, 0x3e95ddf8, 0x3f452f8b, 0x3f7cedd5, 0x3e185a7c,
0x3f73aa6c, 0x3f7f1133, 0x3f74dbfb, 0x3f2c2fd8, 0x3e4872ac, 0x3eb6e2e0,
0x3e91935a, 0x3eb2b562, 0x3f64e525, 0x3f15b029, 0x3f513750, 0x3f3524e8,
0x3d59cbf0, 0x3f2e3b2b, 0x3ee0eb9e, 0x3f546dab, 0x3d14d5d0, 0x3f257374,
0x3f7919bb, 0x3f490b1c, 0x3f16ab5c, 0x3f40e084, 0x3e19099c, 0x3ee85d48,
0x3eba62fc, 0x3db78e00, 0x3f6ceab8, 0x3e317a24, 0x3f1fa2a0, 0x3f1420b6,
0x3c158fc0, 0x3e10f6c0, 0x3f22e418, 0x3ed5b692, 0x3ecace68, 0x3f3cc42c,
0x3ebeaef0, 0x3ee7b088, 0x3efb85e2, 0x3f601876, 0x3f1f4b7d, 0x3f1e81ea,
0x3eee9634, 0x3eb2c5fe, 0x3eef3554, 0x3f4a7a82, 0x3caf3280, 0x3f72cf34,
0x3e9aafd0, 0x3f01f6f8, 0x3d72e160, 0x3e3c0c40, 0x3ed318d6, 0x3f4a0b90,
0x3f5dc59f, 0x3edcbaf8, 0x3f2c8435, 0x3f4dea36, 0x3f36eda3, 0x3f7c7056,
0x3f6e9798, 0x3f7887d5, 0x3eb053aa, 0x3e8efdee, 0x3f7a7a39, 0x3f433fac,
0x3eb38584, 0x3f5f3ece, 0x3d731830, 0x3c367c80, 0x3f6a5f53, 0x3efdab28,
0x3f270c9d, 0x3f3e9627, 0x3e5eb05c, 0x3f647ede, 0x3c8dfda0, 0x3f720f81,
0x3d241a50, 0x3ec868f8, 0x3ec12f9a, 0x3ca47700, 0x3f2ca4ce, 0x3ef4abf4,
0x3ecb2688, 0x3e23561c, 0x3f6f5198, 0x3ea55200, 0x3d8ee818, 0x3f4a0a40,
0x3e4eb34c, 0x3f6c9f2b, 0x3f37ca0f, 0x3f6b2da8, 0x3f39cf91, 0x3f2b78e3,
0x3eb1faa8, 0x3f6d0da1, 0x3f683a3a, 0x3eeaf18c, 0x3e9a626a, 0x3e9ac5da,
0x3f1842a0, 0x3db66f18, 0x3f030844, 0x3f0081f5, 0x3eef6d7e, 0x3f3b02fd,
0x3d0fe810, 0x3f1b6ccb, 0x3f146d3e, 0x3f3b640f, 0x3e17cbcc, 0x3e51480c,
0x3f6e57e0, 0x3f4f759d, 0x3c89ac20, 0x3f455ae5, 0x3f23520b, 0x3f4254d0,
0x3f4762d4, 0x3e646460, 0x3ce018a0, 0x3e7ff60c, 0x3e7dbe08, 0x3f490e08,
0x3eab9ab0, 0x3f71279e, 0x3ec308dc, 0x3e79b7e8, 0x3f0533ca, 0x3e9092d0,
0x3f34ddf6, 0x3ebf1bfc, 0x3f287aca, 0x3f484329, 0x3ef93cb6, 0x3f086c69,
0x3f177abb, 0x3ee61b4e, 0x3e882744, 0x3f5d5d3d, 0x3d032af0, 0x3e86e918,
0x3e3787f8, 0x3e99895c, 0x3f1bf283, 0x3f7465f2, 0x3f3a3760, 0x3c8394c0,
0x3ed7777a, 0x3d429930, 0x3f519e3a, 0x3e119e9c, 0x3f1255b6, 0x3e56ab2c,
0x3f76628c, 0x3f75ffc9, 0x3ee93664, 0x3eb53df8, 0x3f722cfd, 0x3f671072,
0x3f325175, 0x3f6b4b00, 0x3e56199c, 0x3ed8c590, 0x3f284d2c, 0x3f2b1b87,
0x3e6548d8, 0x3f1813fd, 0x3f253675, 0x3f3ea74a, 0x3ebdaffc, 0x3f623436,
0x3f7ca4f7, 0x3ef80a6e, 0x3f2fd989, 0x3e97173c, 0x3f429e6f, 0x3f6a5080,
0x3efc7faa, 0x3f68eabf, 0x3f32bd55, 0x3ead4eae, 0x3f48fb09, 0x3e6f1bd0,
0x3c9c7b80, 0x3f2c641e, 0x3f2c62c7, 0x3dfbb258, 0x3eed3488, 0x3e4145d4,
0x3ef7b352, 0x3e261aec, 0x3f490dac, 0x3d5812f0, 0x3edf7d5c, 0x3f145c08,
0x3f0a4d41, 0x3f423434, 0x3e85940e, 0x3ecf4dbe, 0x3f2bf841, 0x3f2441e2,
0x3f2ab4d1, 0x3f6280f8, 0x3e2bf75c, 0x3c89cda0, 0x3f6eb70d, 0x3ec03ac8,
0x3cfe2700, 0x3ca7f1a0, 0x3d6cdd50, 0x3e617ee0, 0x3efb8ad2, 0x3f151055,
0x3df1f228, 0x3ebbda10, 0x3e5cd078, 0x3e6a4abc, 0x3f146456, 0x3ed83d5a,
0x3f572cf9, 0x3e803aaa, 0x3ec6558a, 0x3cafa8c0, 0x3f6ca7e8, 0x3f3e72ed,
0x3c539440, 0x3ed5dbd2, 0x3f190013, 0x3f54dc87, 0x3cba8620, 0x3f19e297,
0x3edbbc60, 0x3f3b6eab, 0x3f24e529, 0x3eb322e8, 0x3db05d18, 0x3b1da600,
0x3f5cb546, 0x3e4589b8, 0x3ee4f080, 0x3ebb4742, 0x3f2d2824, 0x3f17f560,
0x3e990320, 0x3f2a0a5b, 0x3e8dc70c, 0x3dba2c20, 0x3f640dfb, 0x3f0c54b2,
0x3f68b832, 0x3f072818, 0x3e011bec, 0x3f04d165, 0x3d15b270, 0x3f02ccb0,
0x3f26d422, 0x3f3403f1, 0x3f30c556, 0x3ef45046, 0x3de2eec8, 0x3e0befdc,
0x3db9c658, 0x3f25c969, 0x3f1a076a, 0x3f07da9d, 0x3eb46cd0, 0x3f7a1c27,
0x3e067c54, 0x3f73b91c, 0x3e9d248e, 0x3ee76372, 0x3f7a90dd, 0x3ec7cde8,
0x3ebcccea, 0x3e471868, 0x3e580598, 0x3eaea5d6, 0x3e17722c, 0x3f7da451,
0x3f46e5df, 0x3f1d91c9, 0x3ed3e17c, 0x3f6374ab, 0x3f09a825, 0x3edd9b1c,
0x3f2f6298, 0x3df09568, 0x3f3d1043, 0x3e1cc804, 0x3f78f9d1, 0x3f2ba9e8,
0x3f5b78d7, 0x3f334558, 0x3f316de8, 0x3f6ef91e, 0x3f31afd0, 0x3f0b2158,
0x3e8e10b0, 0x3edf2d64, 0x3dd6e110, 0x3f225347, 0x3f6d9959, 0x3ed320c6,
0x3e70d344, 0x3e56fdd0, 0x3f5d77a8, 0x3ea6728a, 0x3ee569e4, 0x3cb04d00,
0x3f3d3403, 0x3f432b18, 0x3cd6f2c0, 0x3f2d2f41, 0x3f3d8e67, 0x3e44f684,
0x3f25c4d7, 0x3f27bff1, 0x3f63e0cc, 0x3f63d261, 0x3f5708ab, 0x3ea6ad68,
0x3e92e8a0, 0x3ec7b2a8, 0x3e9e2806, 0x3e85dab2, 0x3e841af4, 0x3e4de57c,
0x3f533c07, 0x3e8cf034, 0x3eb08c02, 0x3ec0b52c, 0x3f4fccf0, 0x3f450456,
0x3f1611b5, 0x3eb682c4, 0x3f3d1629, 0x3f08e6c0, 0x3d88b960, 0x3ee858de,
0x3f7a1db8, 0x3e694c18, 0x3eea23e4, 0x3f5d92f8, 0x3f6247d8, 0x3ea96e62,
0x3f22f878, 0x3f7ca7c3, 0x3ef5a19e, 0x3efb917e, 0x3ece38ce, 0x3f6ffcce,
0x3f6f8161, 0x3f62cc20, 0x3ef849fe, 0x3f4d7328, 0x3f6c6233, 0x3dbb78f8,
0x3f63b421, 0x3d0ffff0, 0x3e21b148, 0x3e070a9c, 0x3d255c60, 0x3c81a9a0,
0x3f271171, 0x3efec452, 0x3f247bff, 0x3e816868, 0x3dab3408, 0x3f0071de,
0x3de909b0, 0x3f53b218, 0x3de4f200, 0x3ee2dc06, 0x3f1e3273, 0x3d1dbd50,
0x3d3bc1f0, 0x3e72dc20, 0x3f2921a7, 0x3f4e41b9, 0x3ec58d5e, 0x3f51ce94,
0x3f565b18, 0x3ed741f0, 0x3f367441, 0x3f0e265c, 0x3e40fa48, 0x3e2b5118,
0x3c98b7c0, 0x3f003e6a, 0x3f6d277f, 0x3e0dcc8c, 0x3f75654b, 0x3f45a03e,
0x3eec46a4, 0x3ca7f820, 0x3e937bc0, 0x3f27efc4, 0x3e5b3990, 0x3e7d651c,
0x3eb65652, 0x3e9fb5a8, 0x3f058f3e, 0x3f3a1be6, 0x3f7396ab, 0x3f21af26,
0x3e9628b0, 0x3f15e4f5, 0x3f5a691b, 0x3f6a8a45, 0x3f30558c, 0x3f24d9e0,
0x3f1b0465, 0x3ea9a774, 0x3efef674, 0x3f4ac8cf, 0x3f31965b, 0x3f292f47,
0x3f4d1662, 0x3f7fd9a8, 0x3dc89ce0, 0x3f31bf2c, 0x3dbf2fb8, 0x3f11056d,
0x3f706665, 0x3ec16a32, 0x3f585ffa, 0x3efb8c32, 0x3eafa1cc, 0x3f499693,
0x3f1d98fb, 0x3e3c0d90, 0x3f54e9b3, 0x3f084daf, 0x3f41cc22, 0x3dcb2790,
0x3cceba40, 0x3f7f2b2a, 0x3f2e0799, 0x3f32acd5, 0x3f408035, 0x3dca84b0,
0x3ef2750a, 0x3f418550, 0x3ed317c8, 0x3cbbbac0, 0x3e669788, 0x3e123e94,
0x3e5a33c0, 0x3f0cb858, 0x3f4fdebf, 0x3e8ae73e, 0x3f3ad384, 0x3f5fac72,
0x3f68225e, 0x3f299d5b, 0x3f383eb2, 0x3d2e2fc0, 0x3e86ebfe, 0x3ed4dd82,
0x3eb7489a, 0x3e0d7c48, 0x3f7216ff, 0x3d8de648, 0x3f27f280, 0x3f2986a1,
0x3e09a7f0, 0x3f4eabbe, 0x3e915532, 0x3ead61b0, 0x3f6c69ce, 0x3f448c4e,
0x3f39a72a, 0x3d6adc00, 0x3f15016d, 0x3f5e2975, 0x3e3a381c, 0x3f117bf9,
0x3f23d70f, 0x3f4a572f, 0x3f4da388, 0x3f18020b, 0x3f5d0c0c, 0x3f089df5,
0x3f4f9b78, 0x3e55f310, 0x3df335f8, 0x3e989620, 0x3ef388a0, 0x3f3ea204,
0x3f24c82e, 0x3f30d6ca, 0x3da9e5c8, 0x3e257ba8, 0x3f3ce840, 0x3f600e76,
0x3f4ffbb9, 0x3e64f914, 0x3f4379fd, 0x3ec28b48, 0x3f169355, 0x3da2a9d0,
0x3f5f400d, 0x3f14f664, 0x3f748e3c, 0x3f54bead, 0x3f2b8a26, 0x3e800e16,
0x3ebfc00e, 0x3f59eeb2, 0x3e427770, 0x3e7392a4, 0x3dcf7eb0, 0x3f16ad97,
0x3f792cb7, 0x3f59f716, 0x3f513bd6, 0x3f1c096a, 0x3f2d24f1, 0x3dc40ea0,
0x3ef7821c, 0x3f731fc0, 0x3e41f49c, 0x3e4c64a0, 0x3e8bfe88, 0x3d529000,
0x3da02110, 0x3ef52a00, 0x3eb6444c, 0x3dadb828, 0x3ebda364, 0x3f2fe0d1,
0x3e3b0660, 0x3e8bbeda, 0x3f0b56bd, 0x3f7121e5, 0x3f5671aa, 0x3f1b0b70,
0x3f714a25, 0x3f61cdce, 0x3ea6916c, 0x3f745fb4, 0x3ed63536, 0x3f1fa671,
0x3e221058, 0x3ed3a964, 0x3f60ac46, 0x3f10e377, 0x3dfb3328, 0x3f217fcf,
0x3e999712, 0x3f192c83, 0x3f3b2a9e, 0x3f5a65c4, 0x3efae0a6, 0x3e699c84,
0x3f077ba8, 0x3e365360, 0x3ed759c4, 0x3d0d34c0, 0x3f162cf4, 0x3f1a5f7b,
0x3ea8b7b8, 0x3f08cf56, 0x3ef5f27a, 0x3f4da628, 0x3e4ec16c, 0x3e2a9660,
0x3f108fbc, 0x3ee1f14e, 0x3f0f22a8, 0x3ecdd798, 0x3f6a96ff, 0x3ee219f4,
0x3e83d640, 0x3ef035e6, 0x3efc0d4e, 0x3ea456fc, 0x3f792d31, 0x3e407ae4,
0x3f076787, 0x3e3c0354, 0x3f30345e, 0x3f37721e, 0x3f4bac1e, 0x3e000e68,
0x3ef4a632, 0x3f1d8a79, 0x3f1b8efb, 0x3ee5baa8, 0x3e723a74, 0x3eddfe00,
0x3bdbda00, 0x3ea2edc2, 0x3ecd4328, 0x3f0667b2, 0x3ea660d6, 0x3e1485b8,
0x3f1d75db, 0x3eb8d374, 0x3dcd85c8, 0x3eb15f44, 0x3ebe3540, 0x3e0b351c,
0x3edf448a, 0x3f3f6611, 0x3b057d00, 0x3deefdf0, 0x3ea77d9a, 0x3eb96394,
0x3f13827e, 0x3f79fbc1, 0x3f43df3d, 0x3f40e6d9, 0x3e4d4560, 0x3f01590a,
0x3f3f1770, 0x3f5075ee, 0x3c016940, 0x3f369ccb, 0x3eef9256, 0x3f5c8a2b,
0x3eff87c6, 0x3ef98f3e, 0x3efc372a, 0x3f6c1bbe, 0x3d985528, 0x3f0fac56,
0x3ea1a5c8, 0x3d588fc0, 0x3f74d0e1, 0x3c2bfb80, 0x3f21bb2f, 0x3f4164fa,
0x3e7ee518, 0x3f3e7fc1, 0x3f5f306b, 0x3f2a9288, 0x3e402ec4, 0x3ecbc582,
0x3f3f92e6, 0x3ed30bb2, 0x3ece7c06, 0x3f1627af, 0x3e9e5b68, 0x3e83defa,
0x3f13f3d7, 0x3e9b44c8, 0x3eb6326a, 0x3f1e320d, 0x3f345313, 0x3f402f2a,
0x3e436ad0, 0x3f6b6267, 0x3f77bf4f, 0x3e25b374, 0x3f082df3, 0x3f0518ce,
0x3ed9cd10, 0x3e0e79bc, 0x3f375186, 0x3ef78588, 0x3f65373c, 0x3e0a1c48,
0x3efc3e56, 0x3e147014, 0x3f4c0eb4, 0x3ee02dc8, 0x3eb077a8, 0x3f6f37d5,
0x3efe9988, 0x3f6641c7, 0x3f6e233b, 0x3f5a3c19, 0x3e558d0c, 0x3f5cc5c2,
0x3eb8a8ba, 0x3f2cc5d1, 0x3f3101a7, 0x3de0c6e8, 0x3f54fef7, 0x3d95c008,
0x3e788eec, 0x3f1b67dd, 0x3f222409, 0x3f712da8, 0x3e67405c, 0x3f31b7d3,
0x3f4fd102, 0x3efd046c, 0x3efca6b0, 0x3f6d19a5, 0x3df793e8, 0x3daa5070,
0x3f7702cb, 0x3f506ab7, 0x3f27a076, 0x3f55bb1e, 0x3f72bd64, 0x3f27c240,
0x3d93cf20, 0x3f10d198, 0x3f2cc756, 0x3d59c870, 0x3f111e75, 0x3e4daff0,
0x3f0c06ad, 0x3f5da51d, 0x3e9dc660, 0x3eceaa4c, 0x3f58d67a, 0x3f44e0b4,
0x3daa4100, 0x3f47d049, 0x3e9400d8, 0x3e188cdc, 0x3e4b3d50, 0x3e8a3384,
0x3f3cbc6c, 0x3d554a90, 0x3edda170, 0x3ef5c6f4, 0x3ef7e32c, 0x3f468a31,
0x3d316110, 0x3f3b6d31, 0x3f70fabc, 0x3e7612bc, 0x3ec31408, 0x3f3676ac,
0x3e686450, 0x3ed8430e, 0x3f4ff828, 0x3d7e8100, 0x3e3bea48, 0x3de5ed28,
0x3d933b70, 0x3f65a8ef, 0x3c2b2400, 0x3e96fb7e, 0x3e4e3ed4, 0x3e4e905c,
0x3f6ef198, 0x3f0a7cf9, 0x3f3bd186, 0x3e02fcf4, 0x3f4f5b14, 0x3f11f976,
0x3c8567e0, 0x3e437f3c, 0x3d927f70, 0x3f436845, 0x3d3c3f30, 0x3e2e7e40,
0x3f2f5336, 0x3ed60ba6, 0x3f585907, 0x3ea42d1c, 0x3f53a5c3, 0x3ef1d578,
0x3d12f280, 0x3ea58648, 0x3f1db202, 0x3ed896d0, 0x3f1a69a7, 0x3e4895f8,
0x3e88f8de, 0x3ebae986, 0x3f2cacb6, 0x3e8b6300, 0x3f097243, 0x3d6d6890,
0x3f36aa21, 0x3dda6810, 0x3f5d5525, 0x3f773125, 0x3f08d4c5, 0x3ebb9654,
0x3f144a20, 0x3dba7120, 0x3f7e3683, 0x3ec8aa32, 0x3f69c524, 0x3f7356b9,
0x3d080870, 0x3f6afdd4, 0x3f797221, 0x3f6ee080, 0x3f174546, 0x3f63de2d,
0x3f0e2ba6, 0x3e8caba8, 0x3f7ed398, 0x3f1693d5, 0x3e2617fc, 0x3f4fb2de,
0x3e3aa900, 0x3f2a6432, 0x3e440a70, 0x3f7adf8b, 0x3eaff642, 0x3f33e8a1,
0x3e256498, 0x3e0dcc58, 0x3f38da84, 0x3e36d164, 0x3ea298a6, 0x3ebdbe6e,
0x3f4a9e9b, 0x3f3166c6, 0x3f403ca9, 0x3ee83b92, 0x3e6ec430, 0x3d567790,
0x3ee04c4c, 0x3f327eec, 0x3f7fb102, 0x3f39927d, 0x3ef05162, 0x3f18137c,
0x3f2d0df7, 0x3f4fd610, 0x3ee98a1e, 0x3dcb7bf0, 0x3f20d328, 0x3f3b9016,
0x3ad8fc00, 0x3f33401a, 0x3f08ff04, 0x3e879e7c, 0x3ebfabca, 0x3f062c57,
0x3f252797, 0x3c24c100, 0x3f5e3746, 0x3f5e2942, 0x3f04073d, 0x3eb16d70,
0x3f2a5fdf, 0x3f4eabd0, 0x3f45c239, 0x3f7d2f3a, 0x3f11bbdd, 0x3f0037c2,
0x3f0786a2, 0x3e909d36, 0x3d8ab390, 0x3ed8ae20, 0x3e2f5364, 0x3f24b350,
0x3f5e11ee, 0x3f4ecc19, 0x3f162952, 0x3ea0adbc, 0x3ee0edc0, 0x3f2e20f3,
0x3c141e80, 0x3e2d0eb4, 0x3e99b296, 0x3ea387f0, 0x3f045ce5, 0x3c0aea80,
0x3f025ea1, 0x3e8d1594, 0x3deaace0, 0x3f01ec3a, 0x3e798b40, 0x3f6190f6,
0x3ec02984, 0x3e9da7c8, 0x3f6e0478, 0x3cb27220, 0x3f423983, 0x3f17954b,
0x3cad7400, 0x3f04289c, 0x3ef04050, 0x3e41f8e8, 0x3e596930, 0x3eaf7e62,
0x3f2d03bc, 0x3f675d3c, 0x3f6acb66, 0x3f204dd4, 0x3f42f7f0, 0x3f5d68e3,
0x3eb8faa2, 0x3e9e208e, 0x3db43448, 0x3f7c3108, 0x3f06eb2f, 0x3df76fd0,
0x3f726839, 0x3f788afa, 0x3d2df150, 0x3f73f521, 0x3ee9f9fa, 0x3ecbcd28,
0x3e4d208c, 0x3dacc190, 0x3e50ee64, 0x3dbe8f88, 0x3f7b7970, 0x3f366db3,
0x3f69a598, 0x3ee6fcec, 0x3f56d6e5, 0x3f4b45d8, 0x3e218a18, 0x3ea5a320,
0x3f0a59ca, 0x3f1281a9, 0x3eead684, 0x3e2287f8, 0x3f002c55, 0x3f06f0e7,
0x3f7185a6, 0x3f563dfe, 0x3eda6c3e, 0x3f50510a, 0x3e0bd720, 0x3f364ccc,
0x3f6cb83c, 0x3e5c546c, 0x3f7160a0, 0x3f7a25d1, 0x3f2dc4c6, 0x3ddf1050,
0x3eb86014, 0x3f307988, 0x3e60b364, 0x3f2b216f, 0x3f170233, 0x3e90a560,
0x3f1fb4bb, 0x3f5c6307, 0x3f76e518, 0x3f579f62, 0x3eb86790, 0x3ef9d3ca,
0x3e99df66, 0x3e3fb484, 0x3ee57db0, 0x3de866c8, 0x3f0339c6, 0x3f32e51d,
0x3e46e738, 0x3f510500, 0x3f7cd665, 0x3f33a5da, 0x3e65e29c, 0x3f6270c5,
0x3f63d846, 0x3f48314f, 0x3cf30d40, 0x3e908352, 0x3f2e7409, 0x3dd3d868,
0x3ea48c34, 0x3c82db60, 0x3f21bf14, 0x3f756025, 0x3f45cae6, 0x3f5cf345,
0x3f564efa, 0x3f580521, 0x3f1dd566, 0x3d264190, 0x3e8df1a4, 0x3c2d0b40,
0x3eeb6ce2, 0x3f2e578b, 0x3f50245b, 0x3dbb7e00, 0x3f4343d9, 0x3f300078,
0x3f4ee399, 0x3e64e4bc, 0x3f28ffa4, 0x3f5ca694, 0x3e6593f0, 0x3f536710,
0x3eb0528a, 0x3ea7e840, 0x3f54e1d4, 0x3f28760a, 0x3f463c53, 0x3f1f4ded,
0x3f604425, 0x3f270acd, 0x3ee33c98, 0x3ddcf9e8, 0x3d8e3fe8, 0x3ebb1ca4,
0x3f0e38a2, 0x3ec5ac5e, 0x3f250d7a, 0x3ef72cd2, 0x3dbe4268, 0x3f3b1d04,
0x3f3f0582, 0x3e219e5c, 0x3ea6702e, 0x3f6beed3, 0x3f4c4630, 0x3eab05c0,
0x3f1085b2, 0x3f287fb3, 0x3f1cbb42, 0x3f0407ce, 0x3e474f60, 0x3f461279,
0x3ef0d36c, 0x3f26d8ef, 0x3e0f9860, 0x3eb7626a, 0x3eaa6ee0, 0x3d3ad180,
0x3ee73df2, 0x3f4fa2c1, 0x3f3fd545, 0x3f6f0e09, 0x3ec0cb9a, 0x3dd1c540,
0x3f0e4267, 0x3f7a8993, 0x3f71f72a, 0x3f13f715, 0x3f2934e9, 0x3f4d1b2b,
0x3f73b580, 0x3f24d9a9, 0x3f46160d, 0x3f005880, 0x3e28e334, 0x3f596991,
0x3e0d9ba4, 0x3e5bb108, 0x3e2d0298, 0x3ec001c8, 0x3e44075c, 0x3f02abe7,
0x3e48b7e0, 0x3f48f140, 0x3e371784, 0x3f724e6e, 0x3f63981a, 0x3f39aaa9,
0x3d023890, 0x3f0754cc, 0x3f341bd4, 0x3f16fd77, 0x3e4caa04, 0x3e423cf8,
0x3f6cc610, 0x3f16c03a, 0x3f6f8276, 0x3e9ad074, 0x3ec44f62, 0x3f185439,
0x3e572f90, 0x3f09432a, 0x397ed000, 0x3d006030, 0x3f6cce05, 0x3e6ffa90,
0x3e89707c, 0x3ec25468, 0x3d4c4850, 0x3f5e0be1, 0x3f21058c, 0x3f53870b,
0x3dcf7c00, 0x3eb316a2, 0x3e1fd5ec, 0x3ee94d58, 0x3f2a5bc7, 0x3f140db9,
0x3ecfbdf8, 0x3f7adedf, 0x3eb7adc8, 0x3f46abdb, 0x3f67a77e, 0x3f5de512,
0x3f071c08, 0x3df25a78, 0x3e4e1280, 0x3f38d7fc, 0x3f3bbb2d, 0x3f6b2c21,
};
// 2,3,5,5
uint32_t kernel_vals[] = {
0x3e1010ca, 0xbd9de6bf, 0xbdb40e03, 0x3d07c460, 0x3dafaf78, 0xbccd1bd8,
0xbe1e0e30, 0xbaea0500, 0x3e05b604, 0x3c917808, 0x3e34318a, 0x3cf2cc70,
0x3dba5928, 0xbdef8b99, 0x3deeaf4c, 0x3e0999e4, 0xbcd53068, 0xbd1ff43c,
0x3e1fe34a, 0x3cfddc80, 0x3d891584, 0x3e2412a0, 0xbe18e584, 0x3e276c82,
0x3c90c168, 0xbdeb852c, 0x3e2ee1e8, 0xbe1c7d4b, 0xbe019dbd, 0x3bad51c0,
0x3e0acf02, 0x3cb43390, 0xbd828287, 0x3d279350, 0x3d8108b0, 0xbdb5cade,
0xbe291e11, 0xbddf82e9, 0xbda02529, 0xbd61ae8c, 0xbd8f694b, 0xbd358f20,
0xbe181e84, 0x3df22f4c, 0xbe364531, 0xbe1026d4, 0xbdf32df6, 0xbe0e3a40,
0x3e17ef54, 0xbca39b48, 0xbd9493c8, 0xbe342924, 0x3b92be80, 0xbdb81fab,
0x3d306e5c, 0xbd9226b0, 0x3e38f870, 0x3e16adbc, 0x3e12bdde, 0xbe2336c5,
0xbd4dc424, 0x3db7f748, 0x3d839a8a, 0xbe34d672, 0x3e1c0870, 0xbdc20690,
0xbe2b2f41, 0x3d374784, 0x3d9a9b28, 0xbe2067f5, 0x3e29e600, 0xbd91ac51,
0xbdc2de89, 0x3d43c5a8, 0x3e1d16f6, 0xbe0068ff, 0x3c4c4b20, 0x3e25828c,
0x3e298766, 0xbdf0dabf, 0xbdcca37b, 0xbe3720f9, 0xbdc71d63, 0xbe24b4d1,
0xbda01e6d, 0x3da88708, 0xbd25e58c, 0xbd84a85c, 0x3e0154de, 0xbd10a28c,
0xbcc18120, 0xbd9ed2bc, 0xbd44b5dc, 0x3e36fabe, 0xbdef4e3b, 0xbe31bfbd,
0xbcc802e0, 0x3db30ed0, 0xbc7a8200, 0x3e372460, 0x3d71c758, 0x3def98d4,
0x3d3826b0, 0xbdc5af54, 0xbd8cc9db, 0xbe04e70c, 0x3d36a850, 0xbe2cf434,
0x3dbae5f0, 0xbb9842a0, 0xbe0f969d, 0x3ddf7740, 0xbcb578d0, 0xbd0f7e4c,
0x3d916ec0, 0xbcd51e38, 0x3bc6af20, 0x3e0ef404, 0x3e28d4d4, 0x3d515bc4,
0x3d7d46cc, 0xbe177230, 0xbdf16a23, 0x3be38380, 0x3d9ecbcc, 0x3b974ce0,
0x3e272334, 0x3e1a3306, 0xbd79275e, 0x3e0c2950, 0xbdbe384a, 0xbdea4848,
0x3d23abb8, 0xbce4c568, 0xbc9cf250, 0xbde2f2f6, 0xbb33ed40, 0xbd2594e8,
0xbe375192, 0xbe039f4b, 0x3d08aae0, 0xbe10a348, 0x3cf9a400, 0x3e2eb8a6,
0x3c67d8b0, 0xbd5300d4, 0xbabd1200, 0xbdab0df9, 0xbd9b63b2, 0x3e14ad7a,
};
// 5
uint32_t bias_vals[] = {
0xbc13d600, 0xbd897640, 0x3d116244, 0x3df5dc48, 0xbce727a0,
};
// 3,5,3,5
uint32_t output_exp_vals[] = {
0xbee9b3fc, 0xbf17b60f, 0xbe484f39, 0x3f0961db, 0x3d4c1743, 0xbf2531ad,
0xbe9e12fb, 0xbd2cb655, 0x3e7577b7, 0x3e57e7a6, 0xbddbe7ef, 0xbd9c38f2,
0xbe8bdcb8, 0x3f07b811, 0x3e993fa2, 0xbee4d216, 0xbed2d3c1, 0xbf018cf6,
0x3e9ab29a, 0xbc846189, 0xbe2a2151, 0xbf0cf26c, 0xbe3d15aa, 0x3ec875d1,
0xbce80543, 0xbd2144f7, 0xbea62a1f, 0xbf0a4bb7, 0x3f1e16ea, 0xbe6af02a,
0xbefc6588, 0xbed44ade, 0xbf129453, 0x3ee895e4, 0xbdc1fd8c, 0xbec8491e,
0xbe75412e, 0xbdec8108, 0x3f1ac5f4, 0x3dbea2f0, 0xbdfc22ef, 0xbe71cce2,
0xbeee18a0, 0x3f0d220f, 0xbd4cce24, 0xbe78ddf6, 0xbec93b9a, 0xbf06ee8e,
0x3f03fe72, 0x3e3ac3ba, 0xbf12738d, 0xbf2febbf, 0xbebc542d, 0x3e8172af,
0xbd330054, 0xbeb90581, 0xbec3ecc7, 0xbf310a62, 0x3f4cf144, 0xbe7f9437,
0xbe86389f, 0xbec82670, 0xbe71353a, 0x3f0802ad, 0x3df16c7b, 0xbeab9b58,
0xbecb53ec, 0xbee876ac, 0x3f218016, 0x3e0a5d6e, 0xbe80e3b4, 0xbe6a7756,
0xbea33ab7, 0x3f3949a0, 0xbe2f42e1, 0xbe3941bb, 0xbddc8acb, 0xbecacf4e,
0x3ef3e4ce, 0x3eb546d3, 0xbee8c535, 0xbee112e6, 0xbe5fdadc, 0x3f58da9c,
0x3dc93527, 0xbdb5e1ed, 0xbe3e8ebb, 0xbec5b836, 0x3ec7d54a, 0x3d819f14,
0xbf0426fb, 0xbefe0525, 0xbe8f9be7, 0x3f4a54bd, 0x3dae38c1, 0xbf211377,
0xbea54527, 0xbf0947c1, 0x3db8822c, 0xbb87d99d, 0xbe25102c, 0xbe8a8a9d,
0xbf0bf3db, 0x3f5a6b25, 0xbe0e5dfa, 0xbdfbbc30, 0xbc731067, 0xbea56ac8,
0x3e765c6e, 0x3f01f8c7, 0xbea4492e, 0xbf227ee2, 0xbe21160c, 0x3e457e8b,
0x3eaed0ce, 0xbd9b51a3, 0xbeaefc45, 0xbeade1a5, 0x3efa07f4, 0x3c93b7b0,
0xbf0bc708, 0xbe736c16, 0xbebb2c04, 0x3f4cffa1, 0xbe6ba6d8, 0xbe6e2e8f,
0xbefa7f52, 0xbeb2ea70, 0x3f2b9797, 0xbdc01028, 0xbc79e4b4, 0xbe13949a,
0xbeb5c4d9, 0x3ebf20b2, 0x3bd54764, 0xbf0d56cd, 0xbee63f7f, 0xbde73ed3,
0x3ed4308f, 0x3dfa5af5, 0xbf22f3df, 0xbe1374eb, 0xbe34d7fa, 0x3e1a3b6d,
0x3e1f64b5, 0xbde3cde2, 0xbe300e8a, 0xbe04bcd1, 0x3f315c43, 0x3d585df5,
0xbec3da8a, 0xbe56fc4e, 0xbf353dcf, 0x3ebcad9c, 0x3c2c3339, 0xbe66802a,
0xbef105fa, 0xbee068fa, 0x3eb0b458, 0xba9b0dc8, 0xbd473b6e, 0xbe8544e8,
0xbebb0b5d, 0x3f0ce772, 0x3e0fecd2, 0xbf1b6e40, 0xbd1dd2cd, 0xbe6f7d6c,
0x3d20971f, 0x3e0530f4, 0xbc98b77a, 0xbeee6e72, 0xbef41f95, 0x3ef63aa7,
0x3ee45d03, 0xbea14327, 0xbef5e1a9, 0xbee27363, 0x3f1ee832, 0xbe403bbc,
0xbe42f7ae, 0xbe932cb8, 0xbeb69872, 0x3ebeca24, 0xbd155237, 0xbecb8a26,
0xbead579a, 0xbec12b5a, 0xbcecc13d, 0xbb50feab, 0xbd660f71, 0xbe938724,
0xbf0ee147, 0x3efae9af, 0x3e55edd3, 0xbea91e8f, 0xbed6ee3b, 0xbf095e37,
0x3e9da89b, 0x3e994dee, 0xbdcc19ee, 0xbed770c0, 0xbe91cccc, 0x3f06de06,
0xbde3ada4, 0xbd40085a, 0xbd522e19, 0xbe92716c, 0x3f4a1856, 0x3e3a5b61,
0xbf04a036, 0xbeae4b75, 0xbe462a7d, 0x3f07781f, 0x3e5d9f05, 0xbe83bdf3,
0xbeb8640d, 0xbf061386, 0x3e8624aa, 0x3eba5fd0, 0xbe5c435e, 0xbedc4cac,
0xbf00a447, 0x3f118d65, 0x3d7c81bf,
};
// 3,5,3,5
uint32_t output_relu_exp_vals[] = {
0x0, 0x0, 0x0, 0x3f0961db, 0x3d4c1743, 0x0,
0x0, 0x0, 0x3e7577b7, 0x3e57e7a6, 0x0, 0x0,
0x0, 0x3f07b811, 0x3e993fa2, 0x0, 0x0, 0x0,
0x3e9ab29a, 0x0, 0x0, 0x0, 0x0, 0x3ec875d1,
0x0, 0x0, 0x0, 0x0, 0x3f1e16ea, 0x0,
0x0, 0x0, 0x0, 0x3ee895e4, 0x0, 0x0,
0x0, 0x0, 0x3f1ac5f4, 0x3dbea2f0, 0x0, 0x0,
0x0, 0x3f0d220f, 0x0, 0x0, 0x0, 0x0,
0x3f03fe72, 0x3e3ac3ba, 0x0, 0x0, 0x0, 0x3e8172af,
0x0, 0x0, 0x0, 0x0, 0x3f4cf144, 0x0,
0x0, 0x0, 0x0, 0x3f0802ad, 0x3df16c7b, 0x0,
0x0, 0x0, 0x3f218016, 0x3e0a5d6e, 0x0, 0x0,
0x0, 0x3f3949a0, 0x0, 0x0, 0x0, 0x0,
0x3ef3e4ce, 0x3eb546d3, 0x0, 0x0, 0x0, 0x3f58da9c,
0x3dc93527, 0x0, 0x0, 0x0, 0x3ec7d54a, 0x3d819f14,
0x0, 0x0, 0x0, 0x3f4a54bd, 0x3dae38c1, 0x0,
0x0, 0x0, 0x3db8822c, 0x0, 0x0, 0x0,
0x0, 0x3f5a6b25, 0x0, 0x0, 0x0, 0x0,
0x3e765c6e, 0x3f01f8c7, 0x0, 0x0, 0x0, 0x3e457e8b,
0x3eaed0ce, 0x0, 0x0, 0x0, 0x3efa07f4, 0x3c93b7b0,
0x0, 0x0, 0x0, 0x3f4cffa1, 0x0, 0x0,
0x0, 0x0, 0x3f2b9797, 0x0, 0x0, 0x0,
0x0, 0x3ebf20b2, 0x3bd54764, 0x0, 0x0, 0x0,
0x3ed4308f, 0x3dfa5af5, 0x0, 0x0, 0x0, 0x3e1a3b6d,
0x3e1f64b5, 0x0, 0x0, 0x0, 0x3f315c43, 0x3d585df5,
0x0, 0x0, 0x0, 0x3ebcad9c, 0x3c2c3339, 0x0,
0x0, 0x0, 0x3eb0b458, 0x0, 0x0, 0x0,
0x0, 0x3f0ce772, 0x3e0fecd2, 0x0, 0x0, 0x0,
0x3d20971f, 0x3e0530f4, 0x0, 0x0, 0x0, 0x3ef63aa7,
0x3ee45d03, 0x0, 0x0, 0x0, 0x3f1ee832, 0x0,
0x0, 0x0, 0x0, 0x3ebeca24, 0x0, 0x0,
0x0, 0x0, 0x0, 0x0, 0x0, 0x0,
0x0, 0x3efae9af, 0x3e55edd3, 0x0, 0x0, 0x0,
0x3e9da89b, 0x3e994dee, 0x0, 0x0, 0x0, 0x3f06de06,
0x0, 0x0, 0x0, 0x0, 0x3f4a1856, 0x3e3a5b61,
0x0, 0x0, 0x0, 0x3f07781f, 0x3e5d9f05, 0x0,
0x0, 0x0, 0x3e8624aa, 0x3eba5fd0, 0x0, 0x0,
0x0, 0x3f118d65, 0x3d7c81bf,
};
test_conv2d(set, strides, input_vals, kernel_vals, bias_vals, output_exp_vals,
output_relu_exp_vals, SAME_PADDING, NULL);
}
void test_valid_padding_non_zero_strides_large() {
input_set *set = &large_input;
strides_input_set *strides = &large_non0_strides;
// 4,15,10,6
uint32_t input_vals[] = {
0x3efb1fe6, 0x3f04c7af, 0x3e09e1c4, 0x3f0c288a, 0x3ec9f8a8, 0x3f69e8f8,
0x3e3ad78c, 0x3f6d04b6, 0x3f5c45b3, 0x3ece8426, 0x3f47fb5c, 0x3ebe3d66,
0x3f101556, 0x3f09cdc8, 0x3f6706c4, 0x3e0f1eb4, 0x3ec8bc90, 0x3dbd7ff8,
0x3b17de00, 0x3e931fc2, 0x3e7470b4, 0x3f62874f, 0x3d194080, 0x3f54f750,
0x3e43d79c, 0x3e659be8, 0x3f6dd296, 0x3f0ce854, 0x3eec83a2, 0x3e3a5c7c,
0x3dbfdce8, 0x3f14bd6f, 0x3eb2de3c, 0x3e895b6a, 0x3f64c7d1, 0x3bbf4400,
0x3f069316, 0x3eaa5b92, 0x3c677f40, 0x3f524fcf, 0x3e5915f0, 0x3eea46d2,
0x3e66215c, 0x3f40b78b, 0x3ed69440, 0x3f2d50b6, 0x3f6bdec5, 0x3f7b4367,
0x3efa6a62, 0x3da8c580, 0x3dfb13b8, 0x3de07918, 0x3f473fdf, 0x3df834f8,
0x3df558d0, 0x3e27efbc, 0x3f1ab25f, 0x3ed5af5e, 0x3ee2c9ba, 0x3f1bbf99,
0x3d8dc3b0, 0x3f6fa25c, 0x3f18bc2d, 0x3d8db638, 0x3ef80f76, 0x3de52090,
0x3ee26e70, 0x3e9ce620, 0x3e8f10c8, 0x3e62ada0, 0x3f6a02d0, 0x3f3457d3,
0x3f0f148b, 0x3bec7800, 0x3eb62f06, 0x3f17133c, 0x3f0211b9, 0x3f56b35c,
0x3f5c1b82, 0x3f5b93c2, 0x3f25babc, 0x3f237624, 0x3f115d74, 0x3ed65682,
0x3ea877aa, 0x3eba8990, 0x3e67ae8c, 0x3e9cfce2, 0x3f0a8356, 0x3d611b90,
0x3f79c40c, 0x3f00c1a1, 0x3f34a013, 0x3ea5df38, 0x3f09a2e3, 0x3e016568,
0x3e11df40, 0x3f4f4e7b, 0x3e04d988, 0x3d958d60, 0x3d873190, 0x3e4571cc,
0x3f2c5002, 0x3f2abab0, 0x3f4ace91, 0x3e4378b8, 0x3e584d8c, 0x3ea0633c,
0x3d4d85b0, 0x3ca631c0, 0x3e8631d8, 0x3eefbb12, 0x3f762250, 0x3e85832a,
0x3f7392cc, 0x3f16f6a8, 0x3da94818, 0x3f049964, 0x3f4c4075, 0x3f385374,
0x3e9f60ea, 0x3f7c0541, 0x3f188ebc, 0x3f480457, 0x3f06a99b, 0x3f1cdb4b,
0x3f471879, 0x3ef1355c, 0x3f0d7dc8, 0x3f5508a3, 0x3f78fa60, 0x3dcc6918,
0x3f062a79, 0x3f5d14df, 0x3e05e104, 0x3f6fb45c, 0x3e9c0a5e, 0x3f1047e2,
0x3e990dd2, 0x3f702316, 0x3f70868f, 0x3f18e412, 0x3f36c1c2, 0x3d1567d0,
0x3ec2f846, 0x3f2f2bed, 0x3ea3a6d2, 0x3e052938, 0x3e74002c, 0x3f5bacb6,
0x3ea05d00, 0x3d1b0c00, 0x3f545908, 0x3eb79dce, 0x3f62f452, 0x3eee4680,
0x3f4e453d, 0x3f6dcecc, 0x3ebde2f0, 0x3d75c940, 0x3f4690d8, 0x3f77b2f9,
0x3f5d7c4d, 0x3f6518a8, 0x3f559934, 0x3eec5048, 0x3f2c1e18, 0x3f63fa3f,
0x3f79d1ff, 0x3f265ef8, 0x3e84972c, 0x3f430748, 0x3ef0b100, 0x3e5a6b60,
0x3ec31cca, 0x3eeaebfe, 0x3e1c2028, 0x3f0ea570, 0x3f57326d, 0x3f6310ae,
0x3eb2d324, 0x3e1f5700, 0x3dbc2160, 0x3f3a7367, 0x3f459dbc, 0x3d951300,
0x3e7efe8c, 0x3f1c37dc, 0x3e7aa230, 0x3f6c120d, 0x3f4234c8, 0x3d168a80,
0x3eee7c14, 0x3ea7113e, 0x3e68b00c, 0x3f4297f7, 0x3ea0b0ae, 0x3f55be3a,
0x3f1ef513, 0x3f72b43c, 0x3f3a6516, 0x3ecac4d6, 0x3f039201, 0x3e794510,
0x3eeaedd8, 0x3acafc00, 0x3e2e0a30, 0x3f749de8, 0x3ec2e9f6, 0x3e08f674,
0x3e352a6c, 0x3f1b991c, 0x3ed49d32, 0x3f46f515, 0x3e550574, 0x3e3330cc,
0x3f097109, 0x3efa98f8, 0x3ea2fd6a, 0x3e0404ec, 0x3f1f102f, 0x3e58d0d8,
0x3ef864e0, 0x3f0055f8, 0x3e9f58b4, 0x3f1264ef, 0x3f6e1e66, 0x3d6e0380,
0x3dcced30, 0x3ee02d72, 0x3ee0a450, 0x3f3dbcbd, 0x3f4b58e6, 0x3f106ff4,
0x3eba5ef4, 0x3f362c53, 0x3f46a0a9, 0x3f290de0, 0x3eb889d8, 0x3d91fa90,
0x3ef58618, 0x3d90bf98, 0x3e966d00, 0x3f7b3920, 0x3f492bf2, 0x3eb87c68,
0x3c6bbbc0, 0x3f1b8c60, 0x3da5f5b8, 0x3e300078, 0x3eed0364, 0x3f50a465,
0x3f11cce6, 0x3e2b11a8, 0x3f57243b, 0x3f120649, 0x3f7923d1, 0x3dea6f38,
0x3f2325f1, 0x3f28b92b, 0x3e567b98, 0x3f7c185b, 0x3bac6980, 0x3f570842,
0x3ee800cc, 0x3eea43ee, 0x3f5854b4, 0x3e3ee6f4, 0x3f22787d, 0x3f7eb373,
0x3edca08a, 0x3f103e44, 0x3ee5f136, 0x3e6c3e70, 0x3d6f6d90, 0x3ed90442,
0x3f031156, 0x3f482df2, 0x3ee1bc50, 0x3f56bb38, 0x3f3cfe7e, 0x3ee127b2,
0x3f20a03e, 0x3e9a35a2, 0x3f780de4, 0x3f407beb, 0x3f5bf3c5, 0x3cc97360,
0x3e855818, 0x3dc7e2e8, 0x3f170ad5, 0x3f64bb17, 0x3f6221fb, 0x3f76cb6d,
0x3ee82c84, 0x3e8893cc, 0x3d434150, 0x3ed38ab8, 0x3eac05dc, 0x3f668dd6,
0x3f49803b, 0x3f68561a, 0x3ca9b8c0, 0x3f4bdfb8, 0x3f0b4755, 0x3ee48e54,
0x3eb4ef00, 0x3dbe76c8, 0x3d0444c0, 0x3d8fea10, 0x3f218222, 0x3f4c3771,
0x3effa3e2, 0x3f512bd2, 0x3e062a1c, 0x3e58cc50, 0x3ea83bc2, 0x3d744060,
0x3d19c180, 0x3ea59b66, 0x3f2e4483, 0x3f148556, 0x3f77eb26, 0x3e256ed4,
0x3df48b50, 0x3f06bb54, 0x3f3529f8, 0x3e14a980, 0x3f57a6f2, 0x3f78465d,
0x3ee75dba, 0x3f346d2f, 0x3f4253ca, 0x3efb970e, 0x3f3f5b7f, 0x3b682000,
0x3ea78bb8, 0x3ea5be72, 0x3d2381b0, 0x3ea07b70, 0x3e97bbb6, 0x3ee4fece,
0x3f7c73e0, 0x3e1f50a4, 0x3f4ceab9, 0x3f7e8915, 0x3f598169, 0x3e6ef65c,
0x3ec93908, 0x3f2b51c7, 0x3f7178f1, 0x3eb384a8, 0x3e7b2f94, 0x3ce514a0,
0x3da97e40, 0x3f4d4ce2, 0x3f323df1, 0x3f2c8b93, 0x3dbe1988, 0x3f15068a,
0x3f6d2592, 0x3f2fb19c, 0x3e9b12b0, 0x3f189359, 0x3eafd3f8, 0x3e906f12,
0x3e350cb4, 0x3ef71540, 0x3d880588, 0x3f7b54af, 0x3ee86f08, 0x3f193941,
0x3f69f1dc, 0x3f0da5f1, 0x3f7e8cae, 0x3f0155a5, 0x3f50c0d3, 0x3ea06680,
0x3ee6018e, 0x3f35bde6, 0x3f0c8ec9, 0x3f48b858, 0x3f34cb33, 0x3f2a8203,
0x3f192248, 0x3f28ca8c, 0x3f1b99ec, 0x3e248d4c, 0x3f2e2321, 0x3f10ea99,
0x3c1ee400, 0x3e811cb0, 0x3f04d7ec, 0x3e0ea6dc, 0x3eb266aa, 0x3b58bd00,
0x3f742656, 0x3f5c446e, 0x3f55f024, 0x3f4242aa, 0x3f635c84, 0x3f519668,
0x3efd1ed4, 0x3eddbbdc, 0x3c0c8200, 0x3f4f860d, 0x3f7a2db3, 0x3f4f22cc,
0x3e7f413c, 0x3e9b018e, 0x3f40b589, 0x3e7cb6b0, 0x3d8db6c8, 0x3df00618,
0x3e129dfc, 0x3eb2c392, 0x3e24078c, 0x3f668a5b, 0x3f732b7d, 0x3f2626e2,
0x3f174914, 0x3f10ffbc, 0x3f76b981, 0x3f2a3556, 0x3f1918f3, 0x3e86c1b6,
0x3ce4c600, 0x3e77149c, 0x3de0d530, 0x3e5dd56c, 0x3f2ca047, 0x3f57ec31,
0x3f5ee5d5, 0x3e0a74b4, 0x3ec78b28, 0x3f3dba83, 0x3f3205b8, 0x3e84e402,
0x3f462657, 0x3f4760a1, 0x3f2a4c2d, 0x3f7f45b9, 0x3f4a967e, 0x3f504b08,
0x3f6ef1e1, 0x3f68745e, 0x3f2a8f59, 0x3ebe6142, 0x3f01f724, 0x3e3b4198,
0x3f71f942, 0x3b47e300, 0x3f2ad0ac, 0x3f636b1d, 0x3ebe1466, 0x3e6c4184,
0x3e5258c0, 0x3f65dc3c, 0x3f052a59, 0x3eb0faaa, 0x3ea7c1be, 0x3f34196b,
0x3ebaadb4, 0x3e531e88, 0x3f2323c5, 0x3cfb8840, 0x3e3760f4, 0x3f22e05f,
0x3f4e688b, 0x3f1e3be7, 0x3e276500, 0x3dd6e130, 0x3ef6257a, 0x3e8b053a,
0x3f51e898, 0x3f7675c2, 0x3ef0dffc, 0x3ea9eb30, 0x3e6095bc, 0x3f5e1e30,
0x3f7d498f, 0x3e3fd274, 0x3f23b067, 0x3f7910d1, 0x3f4c62c2, 0x3f1854bc,
0x3e9636de, 0x3e96bef0, 0x3f66ad3a, 0x3f35ca4d, 0x3e116ee4, 0x3f3d2efc,
0x3f126699, 0x3f24e4a7, 0x3dbe5e30, 0x3f6e5dc9, 0x3f6f4499, 0x3e5065b4,
0x3ef74efe, 0x3e3fbcfc, 0x3e9af28c, 0x3f40dee9, 0x3d6f78c0, 0x3f1ef8cd,
0x3f49b785, 0x3e9c5d2c, 0x3e28c99c, 0x3efc5c32, 0x3f6a96f4, 0x3f1eefce,
0x3e82942c, 0x3da59718, 0x3e776bc8, 0x3e2db094, 0x3f0e7fbd, 0x3f4a7e9c,
0x3f73e136, 0x3f0ef035, 0x3e54c844, 0x3d3ab0e0, 0x3ceaef20, 0x3f761c50,
0x3eb43666, 0x3f4feac0, 0x3ed8ee16, 0x3eaa473a, 0x3f26617d, 0x3f5126e3,
0x3f3f94a2, 0x3f22481a, 0x3f2e4011, 0x3e8ef2de, 0x3e365e14, 0x3f7dd058,
0x3f60949d, 0x3ed27088, 0x3ece364a, 0x3f733387, 0x3dd4d518, 0x3e8fd25a,
0x3a223000, 0x3f7955f5, 0x3f19a65e, 0x3ec22a74, 0x3f06bcde, 0x3db46c30,
0x3f17d92c, 0x3f63e82b, 0x3ec37972, 0x3ee10f36, 0x3eb729e8, 0x3f16a912,
0x3e20ece8, 0x3e6976bc, 0x3c9a7e40, 0x3f17379c, 0x3f1f96b4, 0x3efb6a70,
0x3f3d9f7c, 0x3f493889, 0x3f104771, 0x3f691356, 0x3f653a89, 0x3ed9e542,
0x3f6d9d14, 0x3ec5448c, 0x3f059ac9, 0x3ee5a888, 0x3d29c310, 0x3dc77fe0,
0x3eaea27e, 0x3f29479d, 0x3e244c30, 0x3f431fc3, 0x3f7f4ff3, 0x3ef71410,
0x3e250eb8, 0x3f0de805, 0x3c97f220, 0x3e1ec460, 0x3e641744, 0x3f03c34e,
0x3e7bc584, 0x3f380601, 0x3f1f9a7e, 0x3cc19ce0, 0x3ecd6970, 0x3f4e7492,
0x3f63b24b, 0x3e7b8df0, 0x3eb98e48, 0x3ed8b934, 0x3f52f89d, 0x3f3d69e9,
0x3ed6865c, 0x3e849cac, 0x3f19a2a0, 0x3f3aa4fa, 0x3e951196, 0x3f15a2bb,
0x3e02e578, 0x3e5c0068, 0x3ec80afe, 0x3f502e89, 0x3e7df2f4, 0x3f5a0e27,
0x3f68c46e, 0x3e347d74, 0x3ec0f706, 0x3f2eabbb, 0x3ec41b44, 0x3ee48d30,
0x3f53eaf8, 0x3e919d0a, 0x3e9953f2, 0x3f610369, 0x3f3207eb, 0x3f66f78c,
0x3ee44a16, 0x3f1eb990, 0x3f7aff76, 0x3e52f3d0, 0x3ef4e9f0, 0x3d386090,
0x3f0ebbc6, 0x3f246f9a, 0x3d5ffcc0, 0x3f1cc8b7, 0x3f749b7a, 0x3ef87be0,
0x3f0fb75f, 0x3f18412c, 0x3ec070ea, 0x3f6a7f47, 0x3f3743ab, 0x3e8d5ac2,
0x3f7bb722, 0x3f5f411a, 0x3eba39de, 0x3e431950, 0x3d333210, 0x3f5220ff,
0x3f4a3d29, 0x3f61584e, 0x3ea5e54a, 0x3f58958b, 0x3e0f6de8, 0x3eb87362,
0x3f171095, 0x3f674b58, 0x3f40e5f4, 0x3dc28630, 0x3f0a15fe, 0x3e25d584,
0x3f02daca, 0x3f4faa88, 0x3f71cec7, 0x3f53415c, 0x3ef63138, 0x3ed4d464,
0x3f29385b, 0x3efdfe26, 0x3f6990ee, 0x3f42bb6d, 0x3eb2342c, 0x3f0012d8,
0x3e05c144, 0x3f208fdd, 0x3e836d7a, 0x3f6fd102, 0x3f2a1859, 0x3f56258f,
0x3f4565e6, 0x3e7e2ddc, 0x3e904c0a, 0x3ee4514e, 0x3f35b7db, 0x3f4684ee,
0x3f35c739, 0x3f02253b, 0x3f32d091, 0x3ef1ec72, 0x3e56d044, 0x3d43ca70,
0x3f0ed186, 0x3f09b44b, 0x3f44b512, 0x3e4c63e8, 0x3eccedfe, 0x3e93d28c,
0x3f4a4b8b, 0x3dbac570, 0x3da57090, 0x3f755b9b, 0x3f2dd421, 0x3e83d67a,
0x3ecea17a, 0x3f68c315, 0x3e8beea2, 0x3ef7233e, 0x3f2e6261, 0x3ef28322,
0x3d301700, 0x3ec74384, 0x3e875442, 0x3ebd7746, 0x3ee907e8, 0x3f38df88,
0x3dabbee8, 0x3f61f4bd, 0x3ee71514, 0x3f085c8c, 0x3f405a59, 0x3edac0de,
0x3f78adcd, 0x3f3ccc39, 0x3ea60262, 0x3f428f40, 0x3f2dbdd4, 0x3f7485c1,
0x3eab8edc, 0x3e69e668, 0x3de87048, 0x3e7c4bfc, 0x3cfbaf60, 0x3f0e1a87,
0x3e0a3a4c, 0x3f258435, 0x3ed50534, 0x3dd094b8, 0x3cb680e0, 0x3f46d914,
0x3f7d40f9, 0x3e8f32e0, 0x3ec4d108, 0x3ec1ad0a, 0x3f0448c0, 0x3ec27550,
0x3e16101c, 0x3e81ffd6, 0x3f09d368, 0x3f26a6b7, 0x3f2bd10c, 0x3f4a3448,
0x3f680539, 0x3f28a1c4, 0x3e9e6cb2, 0x3f0ce0c0, 0x3e1303c4, 0x3f323574,
0x3f720290, 0x3f3e9879, 0x3e8880e4, 0x3ebbe646, 0x3e78551c, 0x3f04bc80,
0x3edf7e8c, 0x3e5b8e58, 0x3f7248e5, 0x3f10a57f, 0x3cb5f580, 0x3ef776ee,
0x3f4a88a8, 0x3e7d2548, 0x3eb63a8e, 0x3eeb3014, 0x3f183e57, 0x3f38b629,
0x3f61f7cf, 0x3f60aaa6, 0x3f4c3d61, 0x3f632e9d, 0x3f5cc626, 0x3f121382,
0x3cf217c0, 0x3ea1e382, 0x3e311c44, 0x3e5324fc, 0x3f716802, 0x3e3d3080,
0x3e6df308, 0x3e285a10, 0x3e9105f8, 0x3f7b6fd1, 0x3dbe3dd8, 0x3eb093ac,
0x3f10a60a, 0x3d9ce3e8, 0x3f3415ef, 0x3f013730, 0x3f62d649, 0x3e3025f0,
0x3f282129, 0x3f49fce7, 0x3f1945e7, 0x3f59e7e8, 0x3e99773c, 0x3f1e920a,
0x3e227ea8, 0x3f5c3593, 0x3dfbd380, 0x3f3388d2, 0x3d744be0, 0x3f6007de,
0x3db4cd40, 0x3f7ee35b, 0x3f62f411, 0x3dfdd1d8, 0x3f522532, 0x3e5a0e20,
0x3f7aab9d, 0x3dff2c38, 0x3eba4ecc, 0x3f5bd074, 0x3f6b8e9a, 0x3f2aec15,
0x3e875894, 0x3f0f5970, 0x3effed90, 0x3ea6615e, 0x3ef2dc18, 0x3de973b8,
0x3c495280, 0x3f18e575, 0x3edd24cc, 0x3e279ef4, 0x3eb67724, 0x3f34ba39,
0x3f383a9f, 0x3f0ca9dd, 0x3f61c23a, 0x3f38048a, 0x3f0ffde7, 0x3cff45a0,
0x3e78b944, 0x3f19d762, 0x3f1efc57, 0x3f183493, 0x3f3c0ef1, 0x3e9b7286,
0x3ea6a7f6, 0x3d1c3580, 0x3f3ba3b7, 0x3e7e7b94, 0x3f3b319d, 0x3efd118a,
0x3ea636c6, 0x3c9e6720, 0x3ec85144, 0x3e87b89a, 0x3e7e22e8, 0x3ed49236,
0x3f7627a4, 0x3e95db98, 0x3f7f9ee5, 0x3f6fe8b9, 0x3f0443ee, 0x3f4808da,
0x3f30b5e4, 0x3f0bc545, 0x3f31cfdb, 0x38f86000, 0x3f233ba6, 0x3e99eb18,
0x3f7d6c3d, 0x3f09b22c, 0x3f3f2ec4, 0x3f124e24, 0x3f128af7, 0x3f2a6d40,
0x3ea586a2, 0x3edb681a, 0x3ece5504, 0x3f029603, 0x3f1a9d46, 0x3f383803,
0x3f5cc771, 0x3a27c000, 0x3db04f68, 0x3dce6d18, 0x3f1d2042, 0x3f22fbbd,
0x3dc10ea8, 0x3f4feda6, 0x3f5c7f71, 0x3f37a3c7, 0x3f21ee7c, 0x3c7d6ac0,
0x3f4d839b, 0x3e28c33c, 0x3d603190, 0x3d50af60, 0x3f454931, 0x3e641980,
0x3f053d19, 0x3f1e6400, 0x3bbaa300, 0x3ef85e94, 0x3e7c3734, 0x3ec13450,
0x3efbb828, 0x3f539b80, 0x3f3c920b, 0x3f40db11, 0x3caa9ae0, 0x3f7476a0,
0x3f31270e, 0x3f34d889, 0x3f265668, 0x3f38238e, 0x3f1e0f3f, 0x3f2a804e,
0x3f5a0e18, 0x3f7922fc, 0x3f556142, 0x3f3c373b, 0x3ec56d56, 0x3f2ebf49,
0x3f1a6b43, 0x3f7b678f, 0x3f449ef0, 0x3e28fcf0, 0x3db670a8, 0x3f16c306,
0x3e215480, 0x3d2edb40, 0x3eb5862c, 0x3d7800d0, 0x3defac10, 0x3f49bef3,
0x3b4d4900, 0x3f0326ec, 0x3e93b188, 0x3b953080, 0x3f24829a, 0x3eee36de,
0x3f72a546, 0x3f24f435, 0x3f25ed29, 0x3f644f16, 0x3f601ce4, 0x3e00230c,
0x3f007043, 0x3ecd65fc, 0x3f50572a, 0x3e6333d0, 0x3f6d09ee, 0x3f710c46,
0x3d63f3a0, 0x3e77e610, 0x3f55738f, 0x3ddadde8, 0x3f57d809, 0x3dfcfbb0,
0x3eb99b38, 0x3f7b469c, 0x3f0088bb, 0x3f105d58, 0x3dc54c08, 0x3f7c7ccf,
0x3f311603, 0x3e4cb7c4, 0x3f7ff53c, 0x3eceb33c, 0x3f6a67b8, 0x3f070edd,
0x3ef7f39e, 0x3d19c230, 0x3e107544, 0x3f28a38a, 0x3f7a0343, 0x3f3ab755,
0x3f545ce0, 0x3d30e140, 0x3f351b67, 0x3ebd2506, 0x3f063351, 0x3f038cb3,
0x3ed42b4c, 0x3e693418, 0x3f6da0cd, 0x3f632c9f, 0x3ca3a020, 0x3f367f4a,
0x3f10126c, 0x3f0d7629, 0x3f07fe5f, 0x3cd47e20, 0x3e0dd1c0, 0x3f5738ba,
0x3f77f75c, 0x3f56639f, 0x3f4d4f63, 0x3bee1000, 0x3d6a91e0, 0x3ce679a0,
0x3d653f50, 0x3ef009b2, 0x3f707b7c, 0x3f6e3f8d, 0x3f149e22, 0x3f48d35b,
0x3f0cbaf2, 0x3ddbda80, 0x3de6fb88, 0x3e90e962, 0x3f6943e1, 0x3ea97734,
0x3dbe7628, 0x3f5ef3c4, 0x3f533603, 0x3f1f5e27, 0x3f521baf, 0x3d4c61a0,
0x3e270004, 0x3eed11dc, 0x3f3b55fa, 0x3f2ea580, 0x3e837f60, 0x3dcaf748,
0x3d024910, 0x3ea39b1a, 0x3f4343ad, 0x3f1416ee, 0x3e909014, 0x3f0b7a44,
0x3f3ed8be, 0x3ef86f42, 0x3f403e99, 0x3e3c9860, 0x3f1e44f9, 0x3f639f3b,
0x3ecdc35e, 0x3f37b491, 0x3f4c9133, 0x3f0de154, 0x3e6d3a18, 0x3cdc73a0,
0x3f2e7908, 0x3ee000de, 0x3f28e79c, 0x3e30c8ac, 0x3ee7840a, 0x3f251eb0,
0x3f0e7d49, 0x3f326641, 0x3f470952, 0x3f6bf969, 0x3f25092a, 0x3f33a95c,
0x3f29c1b4, 0x3f358259, 0x3d3e86c0, 0x3f13c2da, 0x3ea2418c, 0x3d20d3d0,
0x3da3bc68, 0x3e2e98bc, 0x3af5be00, 0x3f5c74db, 0x3f2ddc69, 0x3f1c4ebc,
0x3ea536e0, 0x3f731956, 0x3f440397, 0x3f5a6bd7, 0x3edb8d4c, 0x3e825f1e,
0x3eb9e452, 0x3f54d22a, 0x3f7f766e, 0x3f367c5a, 0x3f75ca05, 0x3f13ff6e,
0x3e0c2060, 0x3ef3d12c, 0x3eb39a14, 0x3f2d35c9, 0x3db6a9e0, 0x3e9ea3f2,
0x3f367a3f, 0x3f5277d1, 0x3f62678d, 0x3f482f61, 0x3f6c0a9e, 0x3f68105b,
0x3f48a7c4, 0x3eb94e86, 0x3ee41e9a, 0x3e399568, 0x3f56523b, 0x3d27da80,
0x3ea3688e, 0x3ec94926, 0x3f40360d, 0x3f6095a9, 0x3d560080, 0x3f42e8a0,
0x3e221114, 0x3f357b2e, 0x3eeb2fa6, 0x3ec8f7de, 0x3f1e14bf, 0x3e9fb55a,
0x3e75799c, 0x3e043474, 0x3f3e3f41, 0x3f62ec9f, 0x3edf39a2, 0x3f240071,
0x3d0aa940, 0x3ec205ec, 0x3e90d806, 0x3eeedc50, 0x3f3c2177, 0x3f0cd18e,
0x3e32bd0c, 0x3e2b5510, 0x3db62460, 0x3f3b50df, 0x3f5c8656, 0x3d0cb440,
0x3eb97b82, 0x3eea0ba8, 0x3f445c08, 0x3f5e8508, 0x3e6ce20c, 0x3e8652b2,
0x3f151cc9, 0x3e9ae2da, 0x3e948fe6, 0x3f1851b1, 0x3e5ccc3c, 0x3f0565e0,
0x3e8522b4, 0x3eab573c, 0x3f5f7f0a, 0x3ece5d48, 0x3e0bc998, 0x3f03ed0c,
0x3db93a08, 0x3ec80d58, 0x3f114b7d, 0x3f343551, 0x3f5ce96e, 0x3e7cfab0,
0x3f40a8a8, 0x3f6285cf, 0x3e97a60c, 0x3ae6bc00, 0x3da64fa0, 0x3f492f69,
0x3f665170, 0x3f1f7ba1, 0x3f7b2b34, 0x3ee5bb02, 0x3e413d78, 0x3e854a76,
0x3f061166, 0x3c8bff80, 0x3f7a459f, 0x3ecbe174, 0x3f735006, 0x3e7056a8,
0x3edb975c, 0x3e1d7c10, 0x3e768ddc, 0x3defc8e8, 0x3f41f65d, 0x3f37fbbf,
0x3e8ec32a, 0x3eec0fa6, 0x3f38ed2b, 0x3ef3e1ea, 0x3f57dc16, 0x3ee48668,
0x3f29bd4c, 0x3f2a4ba5, 0x3f24b7df, 0x3f1dc4d0, 0x3f6e480a, 0x3f27fd8d,
0x3f0a3846, 0x3f4fa834, 0x3e458a54, 0x3c651500, 0x3d8294e0, 0x3f4f79b4,
0x3e8af340, 0x3f750540, 0x3f10410d, 0x3f68d688, 0x3ec04f0e, 0x3e9b4964,
0x3f57d1aa, 0x3e269844, 0x3d7dd470, 0x3f487e77, 0x3eea6ab6, 0x3f246078,
0x3f4b6754, 0x3f78f291, 0x3f09c52d, 0x3f7e4924, 0x3f7665f0, 0x3f6613b2,
0x3f5e8ecc, 0x3f55f7f1, 0x3d8810e0, 0x3e91f6ba, 0x3eb7a8a6, 0x3f145771,
0x3f584d56, 0x3dd12d38, 0x3f6d7552, 0x3e151aec, 0x3eeedc04, 0x3f6f1fc5,
0x3e503050, 0x3db59340, 0x3f403f9d, 0x3f4e20eb, 0x3f489973, 0x3f3799d7,
0x3f75306d, 0x3de0a890, 0x3f65653d, 0x3f78506c, 0x3f42ad47, 0x3f5dfae5,
0x3efaef42, 0x3f027d85, 0x3f18d19a, 0x3e55943c, 0x3e31d730, 0x3f63ae74,
0x3d23fa00, 0x3ec87ec4, 0x3e930264, 0x3e53a034, 0x3d868598, 0x3f4cfbde,
0x3f512309, 0x3c3be0c0, 0x3f41660f, 0x3eed1d32, 0x3e9f5226, 0x3f22bddc,
0x3e36b2ec, 0x3eb74a74, 0x3f385fae, 0x3f6cd6fe, 0x3f28d8f3, 0x3f4034b6,
0x3e87c4c8, 0x3f502e00, 0x3f463b41, 0x3ed869ca, 0x3f79ce5e, 0x3f4e5bd5,
0x3f18efff, 0x3eedb4ee, 0x3ed8c8da, 0x3cacda00, 0x3f39b71e, 0x3f08a753,
0x3f5adc83, 0x3dced118, 0x3f7f76b3, 0x3e961664, 0x3f4a107e, 0x3f15cb58,
0x3f1482e9, 0x3e2fa464, 0x3e396c64, 0x3e5afe2c, 0x3f27651b, 0x3e9dd2f2,
0x3d9471f8, 0x3f2fc954, 0x3d5d6120, 0x3f60812e, 0x3ebeee1e, 0x3ef74d5e,
0x3e2360f8, 0x3f280e5d, 0x3f223bf5, 0x3e098208, 0x3ed5c05e, 0x3f419257,
0x3f2f7988, 0x3e90e382, 0x3f73aa27, 0x3f5f8ca1, 0x3f6baa17, 0x3eebf75a,
0x3eb9d238, 0x3db97c70, 0x3f13beb0, 0x3f2fd1dc, 0x3f1b43ba, 0x3eadb87c,
0x3f22f4f3, 0x3f365e2c, 0x3eb67e5a, 0x3ee924b4, 0x3f19935e, 0x3f1fa90a,
0x3f6f9c0d, 0x3e6e8b98, 0x3d969cb0, 0x3ebfbd52, 0x3f4ae7d3, 0x3ef7b4bc,
0x3e32575c, 0x3d7e1040, 0x3f55f21b, 0x3f46b501, 0x3f5c4488, 0x3f0ad460,
0x3d8bba38, 0x3f755ec6, 0x3c0772c0, 0x3e5e1668, 0x3f24868d, 0x3ec90faa,
0x3eaa22fc, 0x3f337691, 0x3e967858, 0x3e9d1b90, 0x3ee62a4c, 0x3efd76d0,
0x3eec2bfe, 0x3ed491ba, 0x3f470aa6, 0x3f70f153, 0x3e8ec54c, 0x3ec2486c,
0x3ea9d4e6, 0x3f12c9bd, 0x3e5e73c4, 0x3e9d0336, 0x3e48e408, 0x3f6ba35e,
0x3f7ce98c, 0x3f67f738, 0x3e30e000, 0x3f0cfef9, 0x3e800654, 0x3e24da18,
0x3e4dde08, 0x3d63f810, 0x3e7dda24, 0x3f67f663, 0x3f682c33, 0x3df33cc8,
0x3f4a26e4, 0x3ecd1220, 0x3f3d2216, 0x3ddce4b0, 0x3f65e844, 0x3ece340a,
0x3f31a06d, 0x3ea55984, 0x3e92de8a, 0x3f152ad9, 0x3e714c98, 0x3f0982b7,
0x3f75ba4d, 0x3c6a9380, 0x3ec5acfa, 0x3f038cd8, 0x3f6bfac5, 0x3f542edf,
0x3c99a2e0, 0x3f6c8d6a, 0x3ec26f2e, 0x3e7785b0, 0x3ebddc76, 0x3f236c2f,
0x3f5d7b7e, 0x3ce05620, 0x3f457b46, 0x3f675754, 0x3ebf4f7e, 0x3efbcbdc,
0x3eae71ce, 0x3e990030, 0x3f706f56, 0x3f5827a4, 0x3f28a1b5, 0x3cdc3360,
0x3ea5c976, 0x3f40fb45, 0x3f07fbd7, 0x3eef39ea, 0x3dbfa1f0, 0x3e871ab2,
0x3f0444f9, 0x3f73d779, 0x3f008f41, 0x3ee47730, 0x3e5115b8, 0x3f2159a1,
0x3f56c876, 0x3ebc632a, 0x3ee5cf6e, 0x3f4fba1e, 0x3eda8ce4, 0x3eae05b6,
0x3f41a8c9, 0x3f6a0608, 0x3f3cf8b7, 0x3eab4d18, 0x3eaf8b42, 0x3e467e40,
0x3e57039c, 0x3efac590, 0x3f5c5b9a, 0x3f487734, 0x3f40a058, 0x3f204690,
0x3f1e977e, 0x3d1b5210, 0x3f1e529f, 0x3e2a73dc, 0x3ec374d8, 0x3f5b594f,
0x3ef58e9e, 0x3d0b0450, 0x3f4f1793, 0x3db6d750, 0x3d9766e0, 0x3f0fbede,
0x3f2a200b, 0x3e890cc0, 0x3eaf877a, 0x3f557638, 0x3d8476a8, 0x3f5fdda2,
0x3effd012, 0x3efe6fea, 0x3ef36232, 0x3f5750dc, 0x3f7b98fd, 0x3f6e69bb,
0x3ec07e7c, 0x3f67d100, 0x3dab3f40, 0x3de3f790, 0x3f39b8ac, 0x3f032c45,
0x3f3b3a41, 0x3e92a656, 0x3f37ab47, 0x3ec76088, 0x3b168e00, 0x3f7ffc48,
0x3f6d40da, 0x3ed852e6, 0x3e78782c, 0x3f365fc1, 0x3db7e790, 0x3f2a4690,
0x3f44258b, 0x3f578106, 0x3ed8b2f6, 0x3f446045, 0x3e3ca5d0, 0x3f2a38e6,
0x3eabb6be, 0x3ec0e31c, 0x3c312e40, 0x3f4c44ac, 0x3f73eac4, 0x3f2e6733,
0x3f01bf30, 0x3f3774a5, 0x3f0af237, 0x3f59b555, 0x3f45a569, 0x3d17de70,
0x3f185d3a, 0x3eea4ab8, 0x3f284b11, 0x3e8ad5f4, 0x3f29c851, 0x3f34265c,
0x3e56612c, 0x3e9c3ba0, 0x3f4d67b6, 0x3f1b4671, 0x3f4876cf, 0x3f6b7e85,
0x3f6c9081, 0x3e3444d4, 0x3ec7a8d0, 0x3f6df398, 0x3f15ae99, 0x3f17e000,
0x3f439479, 0x3f7da41d, 0x3f062d00, 0x3db7a440, 0x3f5b43bf, 0x3f07b2d3,
0x3f3777c8, 0x3e6cffd0, 0x3f78570b, 0x3f5306f8, 0x3dc78520, 0x3ed4442e,
0x3f768b99, 0x3f0d2fd2, 0x3f019e24, 0x3f2061ec, 0x3f64958c, 0x3ddbafe0,
0x3ede4ff0, 0x3f2c735d, 0x3f5c2ae3, 0x3f05987a, 0x3f3a8226, 0x3f103c9e,
0x3f60991e, 0x3f17be78, 0x3f7d64cd, 0x3f7ecd70, 0x3f2a7cbb, 0x3f3392f4,
0x3ee1fb42, 0x3eeb50f4, 0x3f6ceafc, 0x3e95aaf4, 0x3f51d9dd, 0x3f30d26e,
0x3f672cec, 0x3ece85ca, 0x3f3a1108, 0x3eb9833c, 0x3ecdfc3c, 0x3f744326,
0x3e4c115c, 0x3e944106, 0x3f51c13f, 0x3e038480, 0x3e8662c4, 0x3e4daaa0,
0x3c34c500, 0x3f2c3bea, 0x3f74d7e6, 0x3f6bc555, 0x3eba5ba6, 0x3f70b2e2,
0x3f421331, 0x3eb55894, 0x3ee6db14, 0x3e09bf0c, 0x3dffdf50, 0x3ec6aca2,
0x3f2c1388, 0x3f2fb529, 0x3f66b2d9, 0x3f7278b9, 0x3f00df53, 0x3eb554fe,
0x3cd22800, 0x3f41eda7, 0x3e96add2, 0x3e073dd4, 0x3f41d61f, 0x3e7d0e3c,
0x3f3797a2, 0x3f2d6060, 0x3eaee812, 0x3f495f54, 0x3f17d3e9, 0x3e29a128,
0x3ec94d3c, 0x3e878340, 0x3f397a76, 0x3e802aa2, 0x3ef726c8, 0x3f1b7d12,
0x3f341766, 0x3e1ff270, 0x3e4b8150, 0x3f707c7e, 0x3f306ce9, 0x3ec96bd0,
0x3f743dd4, 0x3eba121c, 0x3e9de208, 0x3e293244, 0x3d3474b0, 0x3f17851b,
0x3f6750a5, 0x3f28eddf, 0x3f06e1bb, 0x3e34752c, 0x3e2f601c, 0x3c5a4cc0,
0x3f049716, 0x3f0dcb62, 0x3f0a8c27, 0x3eb62324, 0x3d822228, 0x3ead06f8,
0x3f3f0a3d, 0x3f7d1f3e, 0x3f067ad0, 0x3dc8bbc8, 0x3f7f414d, 0x3f41c1aa,
0x3d1592a0, 0x3f56773d, 0x3eddb3a4, 0x3f79aaa3, 0x3d3a7be0, 0x3e838bcc,
0x3f097530, 0x3e883aca, 0x3f28beee, 0x3f2f533a, 0x3f3ccde2, 0x3f7642ce,
0x3f296fd6, 0x3f5bf4e3, 0x3f7baace, 0x3e69e954, 0x3f5c01ae, 0x3e2c1cbc,
0x3f417069, 0x3e5c1d0c, 0x3eaa39ae, 0x3df9cf68, 0x3eb76c40, 0x3e593544,
0x3f2e3814, 0x3ea9971a, 0x3f466aee, 0x3eab6256, 0x3ed7a840, 0x3f5520fc,
0x3f55d7ab, 0x3f5edf92, 0x3f1f5934, 0x3f1339bf, 0x3f69261c, 0x3f6e3d77,
0x3f08ed87, 0x3e07a544, 0x3f50f963, 0x3e8cbac4, 0x3dbb0060, 0x3f38aca9,
0x3f29aac1, 0x3eb8da7a, 0x3f68687b, 0x3f4a6173, 0x3aa8ba00, 0x3f4410ec,
0x3c191e00, 0x3db66910, 0x3ee78460, 0x3e8250c2, 0x3eda77e6, 0x3e150e0c,
0x3e85435a, 0x3f3b6662, 0x3ecaca3c, 0x3f2623c9, 0x3edf3894, 0x3f182df0,
0x3f3a721e, 0x3f5bb953, 0x3e9b0806, 0x3f5c289b, 0x3e9737d4, 0x3ee818ea,
0x3f2c3e8c, 0x3f7085f2, 0x3eb91014, 0x3e14ff5c, 0x3f09fc37, 0x3e878f32,
0x3f36a5c5, 0x3ea6c388, 0x3e933240, 0x3de07df8, 0x3f441b49, 0x3f1b30d8,
0x3f33b738, 0x3e05692c, 0x3ec0c92a, 0x3cb27d20, 0x3f447bf3, 0x3f33eb6c,
0x3d9ec4a8, 0x3f438b95, 0x3f4acfa9, 0x3f181633, 0x3f254931, 0x3e1dcf7c,
0x3f2a58c5, 0x3e34e20c, 0x3f266b7a, 0x3f647e3b, 0x3f5a65fb, 0x3ee099ae,
0x3eb35820, 0x3eaab0e2, 0x3e8738dc, 0x3e395a90, 0x3e88a826, 0x3f2187fd,
0x3ed68c56, 0x3db58110, 0x3f486561, 0x3edd8560, 0x3f088cd6, 0x3f7be94f,
0x3eebcafc, 0x3f63d8d3, 0x3f0b7a92, 0x3f5020e1, 0x3f2ae997, 0x3dc740a8,
0x3f6c0217, 0x3ee646b4, 0x3f0510e1, 0x3f63518b, 0x3f292a1e, 0x3f70fc3c,
0x3f72a4a8, 0x3f1106c8, 0x3f316cb3, 0x3f4630ed, 0x3ea42778, 0x3ee7383a,
0x3f3f7ffe, 0x3f0779e9, 0x3e613330, 0x3f28fe48, 0x3f67f1b4, 0x3f1c7a16,
0x3e9c3b4a, 0x3f124dce, 0x3ee4e97e, 0x3f1636eb, 0x3df17868, 0x3d8c80c0,
0x3f25f56f, 0x3e8fd746, 0x3f61a234, 0x3f0965ee, 0x3e868be4, 0x3d768430,
0x3f2c07b0, 0x3f7d6acc, 0x3f52edc2, 0x3f6fbf5d, 0x3ca6e3e0, 0x3e79da24,
0x3f620b27, 0x3d3edd10, 0x3f482759, 0x3f0491a1, 0x3f0f50ba, 0x3eed244c,
0x3f0a1668, 0x3ede80d4, 0x3edc13d4, 0x3f31beaf, 0x3de467e0, 0x3f185740,
0x3ceb64c0, 0x3e55df5c, 0x3f0ce9de, 0x3eed511c, 0x3f5b1690, 0x3f72794f,
0x3f336dab, 0x3e238744, 0x3f5316b3, 0x3f655fde, 0x3e292c48, 0x3ed5a3d8,
0x3dd0dfd0, 0x3f44c4e0, 0x3f16495c, 0x3b92b700, 0x3f21bf81, 0x3f583be9,
0x3f109eb0, 0x3f5df0b1, 0x3f467ca9, 0x3eae953e, 0x3f71a80f, 0x3da50470,
0x3f47aa45, 0x3ec203d4, 0x3f726be5, 0x3ea4fdee, 0x3da21d58, 0x3f294699,
0x3d8ae288, 0x3ed194aa, 0x3f47ab15, 0x3f247150, 0x3ed1772c, 0x3f544a52,
0x3cd05740, 0x3f315a73, 0x3e1e1b2c, 0x3f32bb5f, 0x3f0f36c4, 0x3d7816a0,
0x3f4cd169, 0x3e9cd702, 0x3ecafbe6, 0x3d3fda60, 0x3f6f5538, 0x3e866d56,
0x3e31c90c, 0x3ef28564, 0x3f382084, 0x3f01b0c1, 0x3f5a0649, 0x3f7d7fad,
0x3e6503c0, 0x3f6d0c17, 0x3f4b17fc, 0x3f764541, 0x3f41fcd5, 0x3f05afb0,
0x3e51a34c, 0x3f2a0c40, 0x3f5cb1e6, 0x3ed4cbda, 0x3f10db2b, 0x3ed5ed12,
0x3eb950a4, 0x3f7d710e, 0x3e72b1ec, 0x3c2da780, 0x3d1e3950, 0x3f651ef4,
0x3f3e816b, 0x3ef423b6, 0x3e98b0d8, 0x3d36b290, 0x3ead6a7e, 0x3f6f7e1b,
0x3f4012b1, 0x3ea35be8, 0x3f4544ce, 0x3f1377ff, 0x3f721bb8, 0x3f2cf517,
0x3dfd3388, 0x3f7094cd, 0x3f1bf9b5, 0x3eaebfe8, 0x3f0a8a99, 0x3dd93808,
0x3f0a5181, 0x3e3e3494, 0x3e77e78c, 0x3e2ae294, 0x3ec64f1e, 0x3f7a17b6,
0x3db13438, 0x3f3bba34, 0x3df86fc0, 0x3f67f92a, 0x3ea7af42, 0x3d41f930,
0x3d9a9040, 0x3f6c3d62, 0x3ef11d6e, 0x3e5eb804, 0x3f3dcc72, 0x3b6d7a00,
0x3ebe9c3e, 0x3e5e8b30, 0x3f6cc3e7, 0x3e0ecaf8, 0x3f5085d8, 0x3f136306,
0x3eea04f8, 0x3ecc6b04, 0x3e9aa354, 0x3f56f005, 0x3ee9a98a, 0x3eb50038,
0x3f3fdd0e, 0x3f739072, 0x3edc6df0, 0x3f61328e, 0x3f257d3e, 0x3d0ec5c0,
0x3f3790ce, 0x3eeb52c2, 0x3f306a5f, 0x3f2bd889, 0x3f5e42c6, 0x3f637ddd,
0x3f1fed94, 0x3d7b5f70, 0x3da97158, 0x3f7fd7d5, 0x3f55b2b7, 0x3f0f845f,
0x3f61a892, 0x3e641338, 0x3e986a90, 0x3f033e43, 0x3f42648d, 0x3e6d3d40,
0x3ee2adde, 0x3cef7220, 0x3f1226fb, 0x3f60081e, 0x3f5767db, 0x3f2cd931,
0x3ee94946, 0x3ece2fd2, 0x3e8b272c, 0x3e505a5c, 0x3f2e950c, 0x3f5d3165,
0x3f7bb32c, 0x3f46cd2c, 0x3f641a27, 0x3dbe0a88, 0x3f231482, 0x3ee4dcde,
0x3f08c3e8, 0x3eddcd8e, 0x3edbbf50, 0x3f0e508a, 0x3c029500, 0x3f5e7667,
0x3f572967, 0x3ed03836, 0x3f364022, 0x3e2b88bc, 0x3ecdb87c, 0x3f4f4864,
0x3e806d3e, 0x3f0a7597, 0x3ed0a246, 0x3ef8c7fa, 0x3eb39dec, 0x3f15bcab,
0x3f521cc1, 0x3eeb58c6, 0x3ee273e8, 0x3ee64b6e, 0x3f1cb300, 0x3e70507c,
0x3f4c7ada, 0x3ed285de, 0x3f74d3e4, 0x3f13b2b0, 0x3eb69026, 0x3df0a208,
0x3f64acf9, 0x3eaee482, 0x3dd0ada0, 0x3ec1330c, 0x3d33aa30, 0x3efc51f4,
0x3e506660, 0x3f0fcc47, 0x3e1bc37c, 0x3eedba8c, 0x3f6b1b56, 0x3ebbb730,
0x3cbca4c0, 0x3ef07072, 0x3ea0d5fc, 0x3d3c8150, 0x3e910c26, 0x3d4b8220,
0x3f16b029, 0x3edb4bee, 0x3f1856bb, 0x3e941346, 0x3cce5f20, 0x3d0297d0,
0x3edf75be, 0x3ebf3696, 0x3eb4db48, 0x3f3ac8eb, 0x3e958cf6, 0x3e3b0190,
0x3f7924b8, 0x3f2158b9, 0x3f283a31, 0x3f6f451b, 0x3f1ad181, 0x3f02752a,
0x3eaa7496, 0x3e9e61ce, 0x3e89e424, 0x3f706390, 0x3ea8fc3c, 0x3e8c4196,
0x3e8fd3e0, 0x3ef14214, 0x3f2f7db5, 0x3f183726, 0x3a17dc00, 0x3da3a6d8,
0x3f13fa3b, 0x3ed8d010, 0x3d09d8d0, 0x3ece6948, 0x3f2a96ed, 0x3ed3d1e2,
0x3dcdf918, 0x3f5a2b0e, 0x3f04d936, 0x3f61fd07, 0x3f3157e3, 0x3e8f2bbe,
0x3db68a58, 0x3f35a7dd, 0x3f43e0f6, 0x3f05c40e, 0x3f7727bc, 0x3ec23d50,
0x3f526af1, 0x3f35a25f, 0x3f1640bf, 0x3efb444a, 0x3bbe6600, 0x3f4d6d00,
0x3f1d3e2f, 0x3efe6e34, 0x3e8f4d72, 0x3f04a7c4, 0x3ecf7ee6, 0x3f4a3ac3,
0x3f1935b1, 0x3e3b9ccc, 0x3ea0885e, 0x3e42c47c, 0x3e9e71e6, 0x3f537d42,
0x3ed8f5e2, 0x3f0891a9, 0x3f1bdf8b, 0x3eb4e490, 0x3ea74d7e, 0x3f7fb442,
0x3f476d72, 0x3ddb9e80, 0x3e1eabac, 0x3d491a30, 0x3f02c419, 0x3f08d52d,
0x3e9f71de, 0x3f02cc29, 0x3de4c790, 0x3ee8028e, 0x3f775731, 0x3e6fc344,
0x3f26d772, 0x3f7d9af7, 0x3efcafaa, 0x3c8e0ae0, 0x3ee53c22, 0x3ee54618,
0x3f2ab56d, 0x3edb31fa, 0x3d963600, 0x3f13813a, 0x3f6d9550, 0x3f1473f4,
0x3f20891d, 0x3f7a432d, 0x3f1cab7e, 0x3eff8f70, 0x3f11b90f, 0x3ea4aace,
0x3e224f90, 0x3e41257c, 0x3e2da9e8, 0x3f2d4e43, 0x3e754fd8, 0x3f6609e9,
0x3d173320, 0x3f64929a, 0x3f46b6a9, 0x3e408dac, 0x3ee75e32, 0x3f433d57,
0x3e85f0f2, 0x3f300ef5, 0x3f4aaa91, 0x3efe4ba2, 0x3f41e04f, 0x3e0ca418,
0x3d95dc50, 0x3e9078ca, 0x3f675c91, 0x3eb8a6d0, 0x3f4baf39, 0x3f0c2056,
0x3d889150, 0x3bd57f80, 0x3e3a3bf4, 0x3e4e4df0, 0x3eab0ffe, 0x3f0c133c,
0x3ea79c34, 0x3e8cd78a, 0x3f05f9a1, 0x3f63d135, 0x3ee0af50, 0x3f27b67f,
0x3ee4e7f8, 0x3f3c424d, 0x3f76cc28, 0x3bdfcd80, 0x3da7c8d8, 0x3e2aa0cc,
0x3f12e4cb, 0x3f7b9b09, 0x3ec001c6, 0x3f5a9924, 0x3e39f428, 0x3f7dea38,
0x3e77cae4, 0x3dc868a8, 0x3f2db1a3, 0x3ec09fc8, 0x3ebc5a10, 0x3ea7699c,
0x3e3691a4, 0x3e8ce004, 0x3f456d22, 0x3e617344, 0x3f4c46f1, 0x3f302bad,
0x3ebc5b8e, 0x3f2d88ef, 0x3f046b5b, 0x3c6da840, 0x3f07e6d8, 0x3e815280,
0x3f4aab7d, 0x3ec04340, 0x3ea633c6, 0x3cd27500, 0x3f43a1fe, 0x3f2b887c,
0x3f432174, 0x3d973220, 0x3f216c73, 0x3ea7b2da, 0x3da974a0, 0x3ea73f28,
0x3f7726e0, 0x3ea9e0f8, 0x3f22ee20, 0x3f5295a8, 0x3f1a30ec, 0x3e713490,
0x3f3b1cdb, 0x3f28c7a8, 0x3de7f4c0, 0x3f1962d3, 0x3dc47678, 0x3d580600,
0x3f5ba907, 0x3c5ec180, 0x3ea29cf6, 0x3e0a047c, 0x3eb521bc, 0x3f2d12e4,
0x3f0211b7, 0x3ea0e182, 0x3da1c048, 0x3f6bf772, 0x3f687cb3, 0x3f118e12,
0x3e447350, 0x3f4b3135, 0x3d9e52d8, 0x3f68de56, 0x3f055759, 0x3f2783b2,
0x3f276191, 0x3f3cfd76, 0x3f5e2fad, 0x3f353203, 0x3e00a790, 0x3e66de8c,
0x3f6f604a, 0x3e9292ce, 0x3f59fb9a, 0x3f7d4d6c, 0x3bf91c00, 0x3f1b3f19,
0x3ea647b8, 0x3f771c34, 0x3e231834, 0x3f554200, 0x3eb29dfc, 0x3e974ebc,
0x3ef8058c, 0x3f14963e, 0x3c889480, 0x3f122b20, 0x3e9b9d12, 0x3f2399df,
0x3d616380, 0x3f3d3cea, 0x3dc06950, 0x3f587785, 0x3d12b6e0, 0x3f7781fe,
0x3f62c99c, 0x3ee9ee7e, 0x3f3cbefc, 0x3e5c43a0, 0x3f460b5f, 0x3da5f5d8,
0x3dc047f8, 0x3e9ebaee, 0x3f6aa518, 0x3eded60e, 0x3f0290a3, 0x3f3117fd,
0x3e8dcd20, 0x3f6eeccc, 0x3ed9a660, 0x3e752314, 0x3ed8b392, 0x3e35c83c,
0x3f01303d, 0x3d0433a0, 0x3ea7ca66, 0x3ed2f11e, 0x3f62dd89, 0x3f2736d8,
0x3e85ce88, 0x3f2cddad, 0x3dc79ee0, 0x3dd59368, 0x3cd7e200, 0x3ec8262c,
0x3f0088c5, 0x3e8f9eac, 0x3ef69be0, 0x3f5703d8, 0x3f28dc67, 0x3f343f42,
0x3e694760, 0x3eaf07d0, 0x3f4701f8, 0x3e9addd4, 0x3f5ecb0d, 0x3e01e05c,
0x3f7c0eac, 0x3f61b64c, 0x3f3dbde3, 0x3f276e4b, 0x3f1b9fe1, 0x3f5a00a1,
0x3e56a4d8, 0x3d355100, 0x3e97513e, 0x3f7563c0, 0x3f44a370, 0x3f484d75,
0x3f14a3f0, 0x3edd9a86, 0x3e97c540, 0x3f2b38fa, 0x3ea52450, 0x3e1ffda8,
0x3cb4f400, 0x3ea919ac, 0x3f16e4ac, 0x3f06c521, 0x3cef2c20, 0x3e16a5d0,
0x3f121e6e, 0x3f5dd2ff, 0x3f3e0a5a, 0x3f7f21a2, 0x3f77966f, 0x3f45f1c8,
0x3f54eecc, 0x3df838c0, 0x3f68cc88, 0x3bad1000, 0x3b71b500, 0x3ddc5348,
0x3e3f66dc, 0x3ecdd10a, 0x3f1b1411, 0x3f768d3e, 0x3e9790d6, 0x3f31fdaf,
0x3ecd4b82, 0x3e99dda8, 0x3f4fe66f, 0x3d883970, 0x3f73ffad, 0x3e817a76,
0x3ef645de, 0x3f5f42fd, 0x3f685a96, 0x3f7ee0b8, 0x3e857726, 0x3d54d6d0,
0x3ef6b218, 0x3f64870a, 0x3efa433a, 0x3f0a2d42, 0x3ecd5ef4, 0x3f27b5b8,
0x3f18d599, 0x3f02a468, 0x3ec6ae00, 0x3e45c4e0, 0x3e796a30, 0x3e94739e,
0x3f675640, 0x3f3364bc, 0x3f4c8dee, 0x3ea8903e, 0x3f1a1c09, 0x3e2e204c,
0x3eb4b28a, 0x3f7fe159, 0x3f0cca06, 0x3e791940, 0x3ea3d052, 0x3f26184e,
0x3f2a704c, 0x3d9060d0, 0x3f7a80cf, 0x3f4cc0c6, 0x3f2fc910, 0x3f3b2d26,
0x3db69cf0, 0x3f026699, 0x3f454095, 0x3f6df60a, 0x3f7cd023, 0x3f677425,
0x3e504aa0, 0x3f74c4de, 0x3f1d1252, 0x3f4d6716, 0x3dfeca28, 0x3f7d2b23,
0x3e0efe44, 0x3ebd1232, 0x3d23bc30, 0x3f5d98b6, 0x3f53cbe5, 0x3f2c4d0b,
0x3f1d8d9c, 0x3e04e914, 0x3f4eac75, 0x3f7b688e, 0x3ebf9742, 0x3eb606cc,
0x3f15e1f4, 0x3f715d1b, 0x3ea678e6, 0x3d730a30, 0x3adc4600, 0x3ed41538,
0x3f7d191c, 0x3f7944c0, 0x3ed2342e, 0x3f35d0b4, 0x3f37a5c5, 0x3f60a0e3,
0x3f2d200b, 0x3f4b7f21, 0x3f39493a, 0x3f0a7a43, 0x3e298768, 0x3de3f950,
0x3ee894ba, 0x3f603702, 0x3f23688b, 0x3f4d266a, 0x3f4b505d, 0x3f5bf4df,
0x3e99ec82, 0x3eea0b92, 0x3eba586c, 0x3f241f30, 0x3ed36a62, 0x3f3b0ead,
0x3f0c6c68, 0x3e1ca338, 0x3ef19a80, 0x3f0391b3, 0x3e4ae480, 0x3f312cd1,
0x3e882b00, 0x3d6cdbd0, 0x3df20720, 0x3f552ce6, 0x3d8457d8, 0x3f279a29,
0x3f14306c, 0x3f352f27, 0x3e948830, 0x3e9876d4, 0x3f1b89ae, 0x3dadca70,
0x3ec64564, 0x3eb84b2e, 0x3ebfbf88, 0x3df3d1d0, 0x3c94a980, 0x3f332e28,
0x3e358be8, 0x3f7d27eb, 0x3f00cc29, 0x3e110758, 0x3f4659a3, 0x3ef088e4,
0x3f07299b, 0x3f0e49b4, 0x3ebf8244, 0x3f5bdcad, 0x3f30ee6b, 0x3f01aba3,
0x3f3168b8, 0x3f5304c5, 0x3f46c8c2, 0x3f02b22d, 0x3d9b1968, 0x3e89197c,
0x3ed0d7f8, 0x3f084022, 0x3ef2b110, 0x3dbf97b0, 0x3f42eb04, 0x3f5c85a5,
0x3f02584f, 0x3ec52bbe, 0x3e3eee8c, 0x3f2c42e5, 0x3dd8dbd8, 0x3ed53dbe,
0x3b89b580, 0x3e9d8f04, 0x3f231fea, 0x3ef7aef2, 0x3f0cfdea, 0x3f010a37,
0x3ea89d68, 0x3d95ab20, 0x3eaba508, 0x3e504740, 0x3eb6260a, 0x3d955910,
0x3b191000, 0x3c63b380, 0x3f5b70c3, 0x3f702761, 0x3f5597a9, 0x3f624aee,
0x3f7f215a, 0x3ecb9636, 0x3ef8b348, 0x3f4a5694, 0x3d9e2ac8, 0x3d1d91f0,
0x3f6d324d, 0x3d8694d8, 0x3f6ed642, 0x3f6c61fe, 0x3eaf6eb2, 0x3f4f39d3,
0x3d8870c8, 0x39c60000, 0x3f6eeb8c, 0x3ed64f4e, 0x3ea5316c, 0x3f381d93,
0x3dec5f48, 0x3f3fcd55, 0x3f7e3b3f, 0x3e437988, 0x3ee9f06a, 0x3f137d39,
0x3d9b9cb8, 0x3e8f6fd4, 0x3f0db959, 0x3d9dc550, 0x3f0cf803, 0x3c824ca0,
0x3f49b93c, 0x3f0024f8, 0x3f61567c, 0x3f0ef5bd, 0x3e3fb8b0, 0x3e89f82e,
0x3f1acbc7, 0x3ebe55ca, 0x3efcee30, 0x3e192cd8, 0x3f116e1b, 0x3eef0498,
0x3d1bd930, 0x3dfcf888, 0x3f29be21, 0x3f4c5b60, 0x3f05cd59, 0x3e04c3b8,
0x3eb0b3cc, 0x3eb1c690, 0x3e884f5e, 0x3d3f6610, 0x3e7c24b0, 0x3ead28a0,
0x3f602641, 0x3e373220, 0x3dd1cd80, 0x3e1397c4, 0x3f1e61b1, 0x3ec616c2,
0x3efd70d6, 0x3f17132d, 0x3dee0960, 0x3f129730, 0x3f5e06a9, 0x3eb00f6e,
0x3d459be0, 0x3f783b0d, 0x3cc6fc60, 0x3f1f52b1, 0x3f41e3d2, 0x3f590e1e,
0x3f2c68a8, 0x3ec32c28, 0x3ec5a670, 0x3ede47be, 0x3f2943ca, 0x3f75085b,
0x3f7f4144, 0x3f123e70, 0x3d8f9718, 0x3ecab2aa, 0x3e8f3f76, 0x3e8d5bf0,
0x3f74b88f, 0x3f490d95, 0x3f637989, 0x3e7c7200, 0x3e0d5968, 0x3f4218e6,
0x3f0b3015, 0x3f37ed57, 0x3da88578, 0x3eb5f42c, 0x3f4c8054, 0x3f080983,
0x3eb9a226, 0x3f0af081, 0x3f61c384, 0x3f109a3f, 0x3f13bee0, 0x3f5db356,
0x3f0d5e9c, 0x3ea82f4c, 0x3f1bcd4b, 0x3d51b510, 0x3f09f00c, 0x3d82ec08,
0x3f5e6ee1, 0x3ecde5dc, 0x3e0f95a0, 0x3ed39f80, 0x3f413551, 0x3e812c1c,
0x3f37428a, 0x3edcc670, 0x3f6d241a, 0x3ed78a4a, 0x3e8a14ba, 0x3eccb736,
0x3daea0c0, 0x3f7fca4b, 0x3f2a8309, 0x3f02ce7c, 0x3b694900, 0x3f7aee80,
0x3f437099, 0x3f121298, 0x3eeb6eb2, 0x3f5f222d, 0x3e207f14, 0x3f654eeb,
0x3f5354b5, 0x3f53c7f7, 0x3f65607b, 0x3da58fb0, 0x3d020c20, 0x3ebab812,
0x3d99f038, 0x3f48fb5d, 0x3da33a48, 0x3f2c382f, 0x3f4faac1, 0x3e44648c,
0x3e17d650, 0x3f69759f, 0x3f5c2d9c, 0x3f2f7408, 0x3f4deeb8, 0x3e80369c,
0x3f578763, 0x3f161695, 0x3ee9a0b2, 0x3f2387f6, 0x3e8ab8c6, 0x3f0fbe20,
0x3f43d4d7, 0x3f0236dc, 0x3f03aeed, 0x3eb6a360, 0x3ec6ae26, 0x3f1cf6ba,
0x3d8155f0, 0x3eb5f598, 0x3f2ec944, 0x3f4a5829, 0x3eca81ce, 0x3e7cfa68,
0x3f2349be, 0x3f51cedc, 0x3e97243c, 0x3e15ae84, 0x3e97e128, 0x3f24110c,
0x3f4d4bce, 0x3f5f5de8, 0x3efc9c5e, 0x3e98d9ee, 0x3e20c9fc, 0x3ee26668,
0x3f19ec8f, 0x3f637941, 0x3f5c45a1, 0x3ef920aa, 0x3f284d25, 0x3f5f43b9,
0x3f444684, 0x3d65a5f0, 0x3f330075, 0x3eda58ca, 0x3ea54fec, 0x3e9e695a,
0x3f29556b, 0x3f0917b6, 0x3f3dde5d, 0x3bb33000, 0x3e32842c, 0x3e607d38,
0x3f24c664, 0x3e474a28, 0x3f6d56dd, 0x3f44add0, 0x3f17fe92, 0x3f13ac13,
0x3f4a9cde, 0x3e2eab70, 0x3e357750, 0x3f4ba011, 0x3e9934ee, 0x3f069a5d,
0x3ea8c8c0, 0x3d824180, 0x3ed1326a, 0x3d65a1a0, 0x3f194f22, 0x3f4847f3,
0x3f00b367, 0x3e9e3ed4, 0x3f6f9af8, 0x3f003f05, 0x3ef6506c, 0x3f7cf576,
0x3e98e6f6, 0x3e9a530c, 0x3ec3ac60, 0x3ca49e20, 0x3f43598f, 0x3eda0dd6,
0x3f770751, 0x3f6db49f, 0x3f002df0, 0x3f551e6c, 0x3ec5fa6c, 0x3e91dc8a,
0x3f3c2046, 0x3f0c31bb, 0x3eded452, 0x3d0fc900, 0x3f140fe7, 0x3f0272ac,
0x3f6cf8ef, 0x3f752bed, 0x3f71371d, 0x3f3e06fe, 0x3ec62fd6, 0x3da0cfd0,
0x3f2770a1, 0x3e4ec10c, 0x3f359ed9, 0x3f4a0f07, 0x3f52ab68, 0x3f5a19cb,
0x3ed9b99a, 0x3e243410, 0x3f5aabd3, 0x3f5da3d4, 0x3f04112d, 0x3f0c9229,
0x3f1190d6, 0x3f5d2301, 0x3d577600, 0x3f031693, 0x3f5bb257, 0x3e8fb77c,
0x3f035d2b, 0x3e592930, 0x3e982898, 0x3ebe9a04, 0x3f5fae2f, 0x3f0c312b,
0x3e05dc00, 0x3f18bbf3, 0x3dfa7188, 0x3f10b13c, 0x3f2799d5, 0x3e736dec,
0x3eaac872, 0x3f794288, 0x3f1c02af, 0x3edc8d40, 0x3eeb2854, 0x3ed88fc8,
0x3f60f296, 0x3f17ef3f, 0x3e9c10ea, 0x3cebcc60, 0x3f3b4496, 0x3e38213c,
0x3f1914ee, 0x3e145484, 0x3f251a26, 0x3f1e3292, 0x3e522ba0, 0x3f22835e,
0x3f22030e, 0x3e484284, 0x3f42a72c, 0x3e5380cc, 0x3f5a0420, 0x3f3e0d80,
0x3f0c6364, 0x3e6575dc, 0x3ebc5ca2, 0x3f5ebe2f, 0x3f6327c9, 0x3eb0dd20,
0x3f1b9df8, 0x3e24c074, 0x3f0f117f, 0x3e87cef2, 0x3f10ad68, 0x3f44e1f6,
0x3e1b9914, 0x3f44a79d, 0x3e069a74, 0x3ece147a, 0x3ee649b8, 0x3f5ac8a1,
0x3ecfca14, 0x3f128ed6, 0x3ee27a88, 0x3de5bc88, 0x3f1fae3b, 0x3d4c0ad0,
0x3ec8a086, 0x3ee8ee7e, 0x3f14b396, 0x3f75e742, 0x3f70ad2f, 0x3e34e8ac,
0x3ea88008, 0x3f23a599, 0x3f5b54ce, 0x3f4a42b6, 0x3f1e336d, 0x3f0bb247,
0x3e3c62d0, 0x3f4ad45d, 0x3f33b8a4, 0x3f701309, 0x3e7cba70, 0x3e6e9144,
0x3ee55078, 0x3f0b4a38, 0x3f2208b1, 0x3f4ab2aa, 0x3ec93514, 0x3f54cda5,
0x3ef9e268, 0x3f5318c3, 0x3f4271d0, 0x3e446518, 0x3c0cc540, 0x3f6ffca9,
0x3eac477e, 0x3edd9ad4, 0x3f64204f, 0x3f790415, 0x3ef92596, 0x3e605498,
0x3ec16dbc, 0x3f31715b, 0x3f0c059f, 0x3edd9362, 0x3d34d790, 0x3f0fb023,
0x3eb1630a, 0x3f1b5d28, 0x3f7b4b91, 0x3f75c52d, 0x3f1cd993, 0x3f083531,
0x3ed72c16, 0x3f011af4, 0x3e47e100, 0x3f11ed07, 0x3f105f4f, 0x3f6b70d1,
0x3dce81e0, 0x3f5d206c, 0x3f2b5a0f, 0x3f510dd5, 0x3f773089, 0x3dcc1078,
0x3f68548d, 0x3f7b910d, 0x3f0069c2, 0x3e9a6492, 0x3eb39030, 0x3f2c1836,
0x3e6e8a30, 0x3f67045a, 0x3f5ebf5d, 0x3f32486f, 0x3ef29262, 0x3e13a8a8,
0x3f42ab87, 0x3ebf0958, 0x3cbb6ce0, 0x3efe75b2, 0x3eff1664, 0x3d968ac8,
0x3d755380, 0x3eace430, 0x3f3e1fad, 0x3dcebc90, 0x3ea1f368, 0x3f71ad48,
0x3ef44b0a, 0x3f695d62, 0x3d8f45d0, 0x3f6325f6, 0x3ea517a6, 0x3f4b12ad,
0x3ee85e72, 0x3e8bb160, 0x3eb37644, 0x3e6946f0, 0x3e963a1c, 0x3bc5d680,
0x3d991c50, 0x3f0dba99, 0x3ead6efc, 0x3e2cbb1c, 0x3e93f2c8, 0x3f4c888f,
0x3f06a59e, 0x3e5d6af0, 0x3eaa509a, 0x3f66abce, 0x3f0eb11f, 0x3e106ef8,
0x3e01d60c, 0x3ebd6b4a, 0x3e83a364, 0x3d291040, 0x3f3147c6, 0x3f6c54ef,
0x3f2d46e2, 0x3ea063ee, 0x3dbd23d8, 0x3f2c3471, 0x3f198565, 0x3f14cc7a,
0x3f50197a, 0x3f4fe81f, 0x3d7f2de0, 0x3f6817dc, 0x3f66531d, 0x3eae3468,
0x3ed86a7a, 0x3e8ffc1e, 0x3ecbc786, 0x3ed739a6, 0x3edfb1c8, 0x3f0bed6b,
0x3f153681, 0x3e90d1ac, 0x3f688961, 0x3cdd5920, 0x3f2553a5, 0x3ed1ddc8,
0x3e7fbcac, 0x3f3641e1, 0x3ebb566c, 0x3dc216d8, 0x3db053e0, 0x3eff6246,
0x3f2e5590, 0x3ef1d916, 0x3e2611d0, 0x3ef35528, 0x3f5048ad, 0x3e206988,
0x3ce4f4e0, 0x3f0c57e6, 0x3eb6e9c6, 0x3ed75f84, 0x3f69ffdc, 0x3f3f286c,
0x3f7f7f91, 0x3e011f98, 0x3db3a9d8, 0x3f58e30d, 0x3f310050, 0x3e1d9ce0,
0x3ef19468, 0x3e8f5922, 0x3ef21cf8, 0x3f6651bd, 0x3e3668e8, 0x3f08cb81,
0x3f1a8bee, 0x3f40511c, 0x3f692993, 0x3e435dd8, 0x3ef67a0a, 0x3e0cefd8,
0x3f6c71b4, 0x3f3acd2b, 0x3c410100, 0x3ebc59c0, 0x3da99290, 0x3e6a7b8c,
0x3dafd3d0, 0x3d2a67f0, 0x3f450ccb, 0x3ddb5478, 0x3d39eb10, 0x3f0eafb7,
0x3e754dc0, 0x3e1c7c50, 0x3f5ee4b2, 0x3f1c7e7a, 0x3f60c0d1, 0x3f0b805c,
0x3f16f607, 0x3f25316c, 0x3f5c4ba5, 0x3f5dcab2, 0x3f3b84a1, 0x3d3da1b0,
0x3a912400, 0x3ddbe120, 0x3e504938, 0x3ea8786a, 0x3f60c1da, 0x3f22c4fd,
0x3f1d1d95, 0x3e711420, 0x3f0ef8af, 0x3f043f45, 0x3f5293d7, 0x3f291b7f,
0x3eea3d2c, 0x3e5e64bc, 0x3f4825cc, 0x3eec1d98, 0x3ed758d4, 0x3e89a952,
0x3cfde3c0, 0x3df27828, 0x3d927b30, 0x3d87e550, 0x3f3b4132, 0x3f0eacf2,
0x3ec04a88, 0x3e82f43c, 0x3f6252aa, 0x3f7cc3a5, 0x3e865bf2, 0x3e710808,
0x3e094d7c, 0x3f59cebc, 0x3eed3662, 0x3f1b9e85, 0x3dbe8080, 0x3d8b4470,
0x3f0c5d83, 0x3d23fb30, 0x3d7561c0, 0x3f781334, 0x3ed7dcaa, 0x3ed4da30,
0x3f5e52b1, 0x3f54d0af, 0x3ebe54a8, 0x3d85b380, 0x3e5194b4, 0x3f5493d8,
0x3f79d6f0, 0x3d887998, 0x3f03ace0, 0x3eb52c78, 0x3f7751ac, 0x3f04adea,
0x3d050890, 0x3ebaf404, 0x3e97a6d6, 0x3ebc7e76, 0x3dbf44a8, 0x3e158ffc,
0x3f2da489, 0x3eaa71a8, 0x3e980f0c, 0x3f2a1f1e, 0x3f7779af, 0x3f639c13,
0x3debd510, 0x3f513358, 0x3ee5acf8, 0x3ebf290e, 0x3ed4b634, 0x3ed78f0a,
0x3f2d7ee6, 0x3dc26f60, 0x3f2c05ce, 0x3e597350, 0x3f172c2e, 0x3d738630,
0x3f4674f7, 0x3e80a71a, 0x3f3a06f0, 0x3f68d353, 0x3f6f7572, 0x3f2a407b,
0x3cc1ebc0, 0x3bf95f00, 0x3c373940, 0x3f647d22, 0x3e898bd6, 0x3db94918,
0x3e0e7130, 0x3eb4baa6, 0x3eef9370, 0x3f7f4a1a, 0x3ea125b4, 0x3f5307a1,
0x3d539b00, 0x3f32ee21, 0x3f1fa938, 0x3f4cf527, 0x3f34f504, 0x3f19c8da,
0x3ee7ff62, 0x3f791b26, 0x3f674aad, 0x3f5433cb, 0x3ebf244c, 0x3f2564cc,
0x3f2ee888, 0x3f6ee3dc, 0x3f402302, 0x3e2db948, 0x3f10354d, 0x3d77e9a0,
0x3e086dfc, 0x3ebbe8a0, 0x3f5717f8, 0x3f098e91, 0x3f50a649, 0x3f21cf6e,
0x3eef57ee, 0x3e6f544c, 0x3f25f7b4, 0x3eafbafe, 0x3f062946, 0x3ee87e1e,
0x3ea6e1dc, 0x3f1c66a4, 0x3ee6fe92, 0x3ee91d80, 0x3f572123, 0x3ec88b50,
0x3f742018, 0x3f54e90f, 0x3e65086c, 0x3f0a6c6e, 0x3e8fd818, 0x3f244c65,
0x3d2a4a40, 0x3f72aaa5, 0x3e95a6de, 0x3f1ebabd, 0x3f340a85, 0x3d4c40f0,
0x3e63d1dc, 0x3f579876, 0x3f29635d, 0x3e5158d8, 0x3f0c63df, 0x3e11fe8c,
0x3efd9a12, 0x3efb66cc, 0x3eca6a44, 0x3ed66508, 0x3dec61c0, 0x3d35f550,
0x3de20030, 0x3db20850, 0x3f75d785, 0x3e89cfa0, 0x3ee36e38, 0x3ea5cf3a,
0x3f628708, 0x3e4a2ba4, 0x3e3ef4e0, 0x3ebe1fd4, 0x3f3204d2, 0x3f388c96,
0x3d266f80, 0x3f4e05f6, 0x3f570918, 0x3e3650e0, 0x3e44cef8, 0x3f461eba,
0x3ead68a2, 0x3d984998, 0x3f51f8aa, 0x3e748b64, 0x3f6ca2d9, 0x3f20e07b,
0x3e94f292, 0x3e08ffe8, 0x3f548a2d, 0x3e1a4b84, 0x3e60af70, 0x3e9aff5a,
0x3efda772, 0x3e3a124c, 0x3f645cca, 0x3e188670, 0x3f45c4e3, 0x3f0502e6,
0x3e865ef4, 0x3ebb3d6a, 0x3e5807b4, 0x3eef22f4, 0x3f0e776c, 0x3f325003,
0x3f5be481, 0x3f3bada9, 0x3f49fa0c, 0x3d996b70, 0x3e56903c, 0x3e8d8cb2,
0x3d8e7520, 0x3f58b77a, 0x3ba20200, 0x3cbe9c40, 0x3f60815f, 0x3cc5e9e0,
0x3e71f000, 0x3f62a7ea, 0x3e6d3bd8, 0x3f59cb91, 0x3ec5af5e, 0x3ecc41b2,
0x3e8ad474, 0x3f3b6b09, 0x3ed75dc2, 0x3e0f521c, 0x3ed84246, 0x3e9a1586,
0x3cb52520, 0x3f1d0fcd, 0x3f6bd064, 0x3f599cab, 0x3eb66bf6, 0x3f535c3a,
0x3b856700, 0x3ebca092, 0x3ecc6d48, 0x3f08dcc0, 0x3b897f00, 0x3f2ae8ed,
0x3f2d2a36, 0x3e464a74, 0x3dcb02e8, 0x3eda8a62, 0x3f4b9edc, 0x3f125394,
0x3f788ddb, 0x3e7b4104, 0x3d5c6a60, 0x3f6309c8, 0x3e8c04be, 0x3f3667b9,
0x3ed8dc34, 0x3ef846f6, 0x3f02654a, 0x3de06ea0, 0x3f4cb88b, 0x3d6d4bc0,
0x3e9cad44, 0x3f6ed175, 0x3f098e2a, 0x3f3d5201, 0x3f3ce099, 0x3e733bb0,
0x3f37f4c5, 0x3ea4e26e, 0x3f1f2323, 0x3f556ee5, 0x3f455e12, 0x3dba88b8,
0x3f4b6c7e, 0x3ed9a0a6, 0x3eb84ec4, 0x3f6619fc, 0x3f2dae60, 0x3ec23506,
0x3d520070, 0x3ef910c0, 0x3f2ab23a, 0x3d07a9b0, 0x3f29343c, 0x3e169d5c,
0x3e85c5e4, 0x3e8ccb44, 0x3f0d6acc, 0x3f0610ab, 0x3da08f18, 0x3ef4f6c2,
0x3f6c30d2, 0x3e864474, 0x3ebcc42a, 0x3edf7a74, 0x3ef1a0b4, 0x3e8aaf86,
0x3f38f8f8, 0x3f245c9e, 0x3f418e46, 0x3e041c3c, 0x3e960e66, 0x3f277689,
0x3d12b8f0, 0x3e127ed4, 0x3e0e877c, 0x3f576bb4, 0x3f60cb7a, 0x3d9b4cf0,
0x3f02649d, 0x3ef08648, 0x3e76ea30, 0x3ed2ec12, 0x3f3181de, 0x3e73a63c,
0x3df88ff8, 0x3ec23b34, 0x3f1fa52a, 0x3f569318, 0x3f033044, 0x3f589280,
0x3de7e730, 0x3f1c62a8, 0x3eb6dd84, 0x3e6c4f20, 0x3d413ab0, 0x3f5b45cf,
0x3e80d9a0, 0x3edf4900, 0x3ef9cf66, 0x3e8e76d8, 0x3d2590e0, 0x3f0dcfa7,
0x3f7f0b3d, 0x3e73b644, 0x3ea4492e, 0x3f302059, 0x3da70828, 0x3e8a3234,
0x3e58c8c8, 0x3eb2b674, 0x3f48fb98, 0x3f105af2, 0x3f598be0, 0x3d7be3d0,
0x3f57fb03, 0x3ec08bbe, 0x3f788d7f, 0x3f495be3, 0x3f501880, 0x3f594dd8,
0x3d3d6b10, 0x3f367bd7, 0x3f6d9f4a, 0x3f369aae, 0x3d698ae0, 0x3f027daf,
0x3eee638a, 0x3f1d3820, 0x3f2134f1, 0x3f6675db, 0x3d8e8820, 0x3ed46a7a,
0x3f73c1e5, 0x3eb09022, 0x3e3b7b2c, 0x3e069588, 0x3f6a933d, 0x3ee4b4b4,
0x3ec684b4, 0x3f6451c7, 0x3ea6806c, 0x3f1cec2d, 0x3f339f15, 0x3ec154e4,
0x3ea7c544, 0x3f0b5d07, 0x3e8b78d0, 0x3f5f1241, 0x3e3d5bec, 0x3f44d002,
0x3e8ed2c2, 0x3f45e176, 0x3ece9b6a, 0x3e8bff6c, 0x3ee269a8, 0x3e379324,
0x3e0df360, 0x3e9ea76a, 0x3e806788, 0x3e401320, 0x3f4eea32, 0x3eefd2f2,
0x3f2911b5, 0x3d9bb480, 0x3f4c48e2, 0x3f7df2cf, 0x3f43a17f, 0x3f534ea2,
0x3e0ac4a8, 0x3c91cfe0, 0x3cd24a20, 0x3da674c0, 0x3ef5e8de, 0x3ec76640,
0x3e3a5a88, 0x3eaaa6d4, 0x3f059644, 0x3e0c21f4, 0x3e9584ce, 0x3f4419cc,
0x3f6f30ca, 0x3f4ee81e, 0x3f5c7c9a, 0x3d9d93a8, 0x3ea9e01c, 0x3f204655,
0x3f5f3f42, 0x3f507d1e, 0x3dd3d140, 0x3f1fac08, 0x3eb3896e, 0x3d98b210,
0x3e744904, 0x3f2701f6, 0x3ed3260e, 0x3f1f0b91, 0x3e423b14, 0x3f0ec2bc,
0x3f02b3c0, 0x3f2433a2, 0x3f462c82, 0x3d7a6790, 0x3e2dd630, 0x3eeac03a,
0x3ebb3756, 0x3e280664, 0x3ea68fea, 0x3f4e6127, 0x3f5a9e70, 0x3e359020,
0x3f6171f9, 0x3eb47ea4, 0x3e86c8c4, 0x3f24b254, 0x3f153db9, 0x3e8848a8,
0x3d5150b0, 0x3f0f86c9, 0x3f442c61, 0x3f2b5c06, 0x3f2a0203, 0x3f6ae7f9,
0x3d4ff760, 0x3d825e90, 0x3e1d82ac, 0x3c2d3900, 0x3e0bec74, 0x3f4b4ae0,
0x3f3ea747, 0x3d38e940, 0x3f7fb9d4, 0x3d2f52e0, 0x3f17bb21, 0x3f784439,
0x3f0619a3, 0x3df725c8, 0x3f72d11d, 0x3db65e90, 0x3f720730, 0x3f783c9c,
0x3eb51d22, 0x3f27b0cc, 0x3f05baec, 0x3e96f534, 0x3f461b79, 0x3f1e7da3,
0x3f418f68, 0x3edf861e, 0x3f14a742, 0x3d644f00, 0x3ef4cd4a, 0x3eaeb200,
0x3f3491c8, 0x3ed2cdaa, 0x3e7b3738, 0x3f40d64f, 0x3e8a2624, 0x3f3d2bb5,
0x3e23b094, 0x3f4fa2aa, 0x3f1dd2f1, 0x3edd400e, 0x3f7b44c9, 0x3e70bbb8,
0x3f492120, 0x3e298a98, 0x3dfa5b98, 0x3e9b0b02, 0x3e92b148, 0x3f6494f3,
0x3f4a54c1, 0x3e435654, 0x3ebd4994, 0x3f6f391c, 0x3eb764e0, 0x3f728eca,
0x3edbccd2, 0x3ec57116, 0x3f6654cc, 0x3f10e510, 0x3f062043, 0x3f693583,
0x3d8a1020, 0x3f4edcf2, 0x3f2559da, 0x3f10889d, 0x3eb765e0, 0x3f55386e,
0x3f35beee, 0x3f148d45, 0x3f5cbf2e, 0x3e607d30, 0x3ef5e90a, 0x3e9d6a82,
0x3f4c40d1, 0x3d8ce2a8, 0x3f5477a7, 0x3eb4a2c2, 0x3ec3bf78, 0x3e994b32,
0x3b59dc00, 0x3f5a9476, 0x3f7a4370, 0x3d9fc408, 0x3e5048fc, 0x3ea3ab18,
0x3d191bb0, 0x3e56a758, 0x3f206535, 0x3e4637c8, 0x3f0140c8, 0x3d54a3c0,
0x3ee116be, 0x3f63e06b, 0x3e08bfd8, 0x3f7f2a9d, 0x3f34bac9, 0x3f78b84a,
0x3d7c0d10, 0x3f2ed9da, 0x3d4240e0, 0x3e3212b8, 0x3c426440, 0x3e738ae4,
0x3e802148, 0x3f218f91, 0x3f4f649e, 0x3f46325e, 0x3f2cb56a, 0x3efd33e0,
0x3f63e5c0, 0x3f6a03c8, 0x3f67f7c3, 0x3f57f7d1, 0x3f128f66, 0x3eb8db2c,
0x3f1768ed, 0x3f2f1311, 0x3f7e06ce, 0x3f539971, 0x3e36d1bc, 0x3f21276d,
0x3f4c7853, 0x3be14a80, 0x3e241fa8, 0x3f68cd85, 0x3f5dadd7, 0x3da7f6e8,
0x3f1e2519, 0x3f32df33, 0x3f74f0f1, 0x3f6dd595, 0x3f539f6c, 0x3f043a4a,
0x3f7d281d, 0x3f7c2902, 0x3ea4c72a, 0x3f44af85, 0x3f28cbf4, 0x3f6afd3b,
0x3f34f8ff, 0x3e95bc74, 0x3f2167a4, 0x3ef247ee, 0x3e99cb86, 0x3ea68870,
0x3f25b107, 0x3f5dbbef, 0x3f58b045, 0x3ebe5cb0, 0x3f1e7667, 0x3f186432,
0x3f60e65a, 0x3f679bba, 0x3de9cd40, 0x3cb8fd20, 0x3f3b2e03, 0x3f35b46f,
};
// 4,6,6,7
uint32_t kernel_vals[] = {
0x3c843120, 0x3d9cb687, 0x3d5b919e, 0xbd96186c, 0x3d58fa3a, 0x3d2d416a,
0xbd0d67ab, 0x3d843683, 0x3d71186a, 0xba010e00, 0xbd9f03ba, 0x3d06df00,
0xbc1a3998, 0x3d79f232, 0x3daa7307, 0x3d925f53, 0xbb375d80, 0x3d134948,
0xbd1e015e, 0x3d34a30a, 0x3d18cc42, 0xbd3504c0, 0x3cf0d59c, 0x3b31aa80,
0x3d82c6a5, 0xbd98537c, 0x3cd30210, 0xbd32858e, 0x3d962b29, 0xbd041de8,
0x3d5905ca, 0xbd9318dc, 0xbc822da4, 0xbd99ca93, 0xbca3e600, 0x3d8da64d,
0xbd764cd8, 0x3c444220, 0xbd7b8ddb, 0x3d32a706, 0x3c441368, 0xbd3e4fd3,
0xbc691d58, 0xbd96f41f, 0x3da60aeb, 0xbd4b25de, 0xbd95ae4c, 0x3d577b42,
0x3d946765, 0x3d5dfee2, 0xbd1e98c3, 0x3d08d7f8, 0x3cda02c0, 0x3d3aa8ca,
0x3d621622, 0x3cdba8cc, 0xbd0418ee, 0x3d98a03d, 0x3d872eed, 0xbd827dc6,
0xbc851910, 0xba82bcc0, 0xbda376c0, 0x3d9e0c99, 0xbd82fce2, 0x3a965840,
0xbcb0ab80, 0xbc87e814, 0x3b674180, 0x3d722b1a, 0xbd8ff94b, 0xbd24e630,
0x3da4799d, 0x3d207270, 0xbcb18006, 0x3cc6e480, 0x3d07dcb2, 0xbd27e0b8,
0x3ce3646c, 0x3cfd7400, 0x3b931450, 0x3c9d81c0, 0x3d827fff, 0xbcb4a356,
0x3d90e22b, 0xbd5dc973, 0xbd93ad6e, 0x3d86e28b, 0xbcfe8596, 0x3a359380,
0x3c8cefc0, 0x3d1e98e8, 0x3d92a301, 0xbce5a52c, 0xbc7dc138, 0xbd70686e,
0x3d67f49a, 0x3c5fb808, 0x3d6cfd3a, 0xbd762403, 0x3d91afd3, 0xbcac63d6,
0xbca5e2f0, 0x3da11785, 0xbc47cad8, 0x3d686e6a, 0xbd9d768b, 0xbbd3e1c0,
0x3d29cdf8, 0x3c8e1a50, 0xbbdf52c0, 0x3d84ca75, 0x3d558672, 0x3cb35b44,
0x3d3ae79a, 0xbba14450, 0x3c4f23e8, 0xbd92a8e0, 0xbc7e1008, 0x3d0aef56,
0xbd1ee9c0, 0x3c1ef9e8, 0xbd816bee, 0x3d5d5a0e, 0x3cf05d90, 0x3d945d4b,
0xbd7c9058, 0xbd84b6db, 0xbd22a31b, 0x3cab977c, 0x3d937d2f, 0xbc86fb94,
0xbd9aefe6, 0x3d965a17, 0xbc85e5cc, 0xbd8383cf, 0x3d3ed7da, 0xbd93875c,
0x3ca917f0, 0x3d27d858, 0xbca242f0, 0xbdaa27de, 0xbd41e7de, 0x3cd140cc,
0x3da4c293, 0xbd9d1c4f, 0xbd8573b4, 0xbd12fe33, 0xbd4da3e8, 0xbd9c6e88,
0xbc53e6e8, 0xbd3007c6, 0xbda06ec6, 0xbbe3b240, 0x3cc78960, 0xbb067e00,
0xbc596918, 0xbbda3d70, 0x3d00ebbe, 0x3d104f7a, 0xbc8a715c, 0x3bb9fa70,
0xbd9b2ed4, 0xbd29cdb6, 0xbb7bdc20, 0xbda18690, 0x3d8aa6f3, 0xbd8896d6,
0x3af500c0, 0x3da33325, 0x3d688536, 0xbcf6ccb6, 0xbd7d58b0, 0x3cfffc2c,
0xbc37caf8, 0xbd80b8fa, 0xbcfc3a1c, 0xbd60d596, 0x3d316982, 0xbc6309a8,
0xbd838248, 0xbce41de6, 0xbd779b3b, 0xbd853a67, 0x3d999799, 0x3d863273,
0xbc750968, 0xbd2b6248, 0xbbebcf40, 0xbca76e64, 0x3d714b62, 0x3bed9700,
0xbd86f7ae, 0x3d2196fa, 0xbb24a280, 0x3ceb63fc, 0xbd0a6a16, 0xbd256e18,
0xbd8f2f32, 0xbd2632de, 0xbd6850db, 0xbd02f30b, 0x3d4e678e, 0xbc3a5be8,
0x3da842a5, 0xbd004ba0, 0xbc3ad7c0, 0xbbfebf80, 0xbce8a4d6, 0xbd07428e,
0xbc3f40e8, 0x3ca5ae7c, 0x3ceca810, 0xbd2a1138, 0xbd9a6d3c, 0xbd7f63d0,
0x3c99953c, 0xbd90eb57, 0x3d2dc97e, 0xbd69bda6, 0x3d8f2d3b, 0xbc447de8,
0x3d6e7a9a, 0x3c0de408, 0x3c6a67e0, 0xbcb53cac, 0xbcb237ac, 0xbc6d7660,
0xbd99aa56, 0x3ce2c874, 0x3d9807fb, 0x3ccfee9c, 0xbd55f0f3, 0x3d9c9a4d,
0xbcae07e6, 0x3d2915ba, 0xbcda6406, 0x3c906df0, 0xbcc83906, 0xbd36f4d6,
0x3d923683, 0x3d8667cf, 0x3da75c75, 0xbc8bf5a0, 0x3d98efe1, 0x3cdf5530,
0x3d1c689a, 0x3d6335f6, 0xbd842942, 0xbda390a0, 0x3d52fe0e, 0xbcd93720,
0x3b76c480, 0xbd769e0e, 0xbd5e580b, 0xbc9c6a80, 0x3d52ce0e, 0x3da14773,
0x3cb84a70, 0xbd2e62a3, 0xbc04cf60, 0xbd89c570, 0x3bc6d770, 0xbd031beb,
0xbd2739c3, 0x3da4af81, 0xbc8278cc, 0xbb758b60, 0x3c92de7c, 0xbd3cfcd0,
0xbcd8cdc6, 0x3d514702, 0x3d77b8a6, 0x3d007242, 0xbd089b6b, 0x3cdfe96c,
0x3d019e30, 0xbc45f238, 0xba8bdc40, 0x3d15f902, 0x3c68bd00, 0x3ce60520,
0xbd35ca38, 0x3c5ca7c0, 0x3d6f2aea, 0x3d908d8d, 0x3d108622, 0x3d808301,
0xbccbab96, 0xbd9517e8, 0xbd1a1a9e, 0xbd8656ab, 0xbc53a0e0, 0x3d9c6e5d,
0xbcc3dbac, 0xbd946d5c, 0x3d7fea56, 0xbc807d7c, 0x3cf08e5c, 0x3c29b0e8,
0x3d90cc99, 0x3c8a5c84, 0xbd676793, 0x3c5a74e8, 0x3bd0f9d0, 0x3cb3ad1c,
0x3d918bc7, 0xbd13a94e, 0x3d8deccd, 0x3cbc253c, 0x3d98008b, 0xbd183573,
0x3cfa6ed0, 0xbd0c2a7b, 0xbcf67e00, 0xbd20b236, 0xbd0a6de3, 0xbd9db21e,
0x3d88543b, 0xbcdb5996, 0xbd51acd8, 0x3d64ed4e, 0x3c90d3d4, 0x3d287db2,
0x3c4da880, 0xbbbddfc0, 0xbd4532e3, 0x3d9fc6c5, 0x3cca5754, 0x3cd4b7d4,
0x3c59de38, 0xbcebf0fc, 0x3d15333a, 0x3cf11144, 0xbbc5d4d0, 0x3ba666d0,
0x3d878075, 0x3d7cfa7a, 0xbd2e5538, 0x3d9d7023, 0x3c15b2a8, 0xbd9ea2e4,
0x3b2f2d20, 0x3d96e0c7, 0x3c12a888, 0xbbf2f780, 0x3a1e5180, 0xbd82f792,
0xbd5499b8, 0x3d0988e6, 0x3bbd3c40, 0x3ca2d0f4, 0xbd214ade, 0xba48f600,
0x3d1398b2, 0x3d088956, 0xbd396bc8, 0x3d0a9f36, 0x3d7298e6, 0x3c7ee2c0,
0xbd8b436e, 0x3d2533b0, 0x3d00c956, 0x3cc59610, 0x3d90e525, 0x3da28ad9,
0xbd9a6c17, 0xbb412ce0, 0x3d212ece, 0xbcab2ff6, 0x3d2d7bc6, 0x3c140260,
0xbcf299ec, 0xbd9c39df, 0xbcc3b556, 0xbca145c0, 0x3c91e144, 0x3d68b302,
0xbb8eba80, 0x3d9cbb9f, 0x3da20deb, 0xbd4f8996, 0x3d8817e5, 0xbd451a30,
0xbc2489b8, 0x3d615c9e, 0xbd1eeff0, 0xbd997632, 0x3d6bd0ca, 0x3da5d6ad,
0xbbbc2c40, 0x3d8f81dd, 0xbc95bc60, 0x3c8441fc, 0x3d1f521e, 0xbd3a9aae,
0xbd64f95b, 0xbd3c07ae, 0xbd8ba57b, 0x3c5915e8, 0x3d0441f6, 0xbc5d2880,
0xbd46130b, 0x3b99d490, 0xbd535633, 0xbce4b076, 0x3da31483, 0xbd100a10,
0x3d529016, 0x3d776636, 0x3c7266a8, 0xbd447ce6, 0x3c39d588, 0x3d8a88c1,
0xbd6102e0, 0xbd4c0f96, 0xbd96a177, 0x3d63fb9a, 0x3da48ae1, 0xbc077de0,
0x3d0d0a12, 0xbd4e8c08, 0xbd8836e0, 0x3c4003f8, 0x3b867570, 0x3d249868,
0x3d7159fa, 0x3cd562cc, 0xbd770406, 0x3d311d2a, 0x3d80deab, 0xbc89604c,
0xbd03fa18, 0xbc3ed040, 0xbd879210, 0xbd853ca2, 0xbc3a2820, 0x3ca8a814,
0xbd707186, 0xbd4ebae0, 0xbd285338, 0x3d44252a, 0x3d43a6ea, 0x3cc93b7c,
0x3d31e60a, 0xbd83f6fc, 0xbcdc9816, 0x3b56cd60, 0x3b9d9870, 0xbd498360,
0x3c890e2c, 0x3da91cb9, 0x3d3b95aa, 0x3d3a85da, 0xbd99b3e4, 0x3be1f540,
0xbd9dae9b, 0x3c377bb8, 0x3d8ad909, 0x3d7ea3ba, 0x39b0fb00, 0xbc559880,
0x3d12fd0e, 0x3caeb8b0, 0x3d30ec4a, 0x3cc17f34, 0x3da66999, 0x3c9d8bd4,
0xbc49a868, 0xbc7262c8, 0xbd575d66, 0x3d53849a, 0x3d20d3aa, 0xbd3eaa60,
0xbd3ae560, 0x3d8c6e43, 0x3d5f4c2a, 0x3da44397, 0xbd4126fb, 0x3d1a521e,
0xbcc23d56, 0x3d3aa852, 0x3ce978bc, 0xbd3873de, 0xbc7f1120, 0xbd9cbb92,
0xbd2e5fc3, 0x3d44b92e, 0xbd1cecc6, 0x3c1dae28, 0x3da088e3, 0xbd572ea8,
0x3da278a1, 0xbd427378, 0xbafe5a00, 0xbd1732bb, 0xbda94f20, 0x3c960d6c,
0x3d5e8046, 0xbc9b8540, 0xbcd4d6bc, 0x39561200, 0xbc11d0e0, 0x3d41fe0e,
0x3d9b803d, 0xbca97a70, 0xbb9b3cb0, 0xbd294f7b, 0xbd628373, 0x3d289706,
0x3cdbf240, 0xbd41cdce, 0x3cecef54, 0x3d90fa4d, 0xbc888134, 0xbc5a2918,
0xbce128e6, 0xbcb35750, 0x3d63d15a, 0x3d930435, 0xbba63e90, 0x3da11179,
0x3bae9d40, 0xbba646f0, 0xbd4a1e16, 0xbd5c64bb, 0xbd49d440, 0x3c643d48,
0x3b8e4b70, 0x3da8028d, 0xbd2bd5cb, 0xbd8d052a, 0xbd49def0, 0xbd673928,
0x3d25a522, 0xbd975f9f, 0x3ce6f274, 0xbd2083b3, 0xbd9ef9ef, 0x3da66731,
0xbd9d9aa8, 0x3ce37d8c, 0x3c248e08, 0xbccab67c, 0xbd0598d8, 0xbd03547e,
0x3cf5786c, 0x3d4a7152, 0xbd93b99f, 0xbbaa4240, 0xbd92ad06, 0xbda84d06,
0xbd7fab28, 0x3da9bac9, 0x3be5d250, 0x3b5ff2e0, 0xbcc6309c, 0xbd87240b,
0x3d5bfcfe, 0xbc884a9c, 0x3c4008c8, 0x3d264cae, 0x3d91e3b3, 0x3d8f85b9,
0xbd8c734b, 0xbd79c950, 0x3d3b9e9a, 0x3d1eb410, 0x3cf60a2c, 0xbd73f176,
0xbd6e1b78, 0x3d921101, 0x3a9b9ec0, 0x3ce062bc, 0x3a38ee00, 0x3da89639,
0xbda2fe80, 0xbd66eb48, 0x3cbeca5c, 0xbd805854, 0xbd961532, 0xbd771aa3,
0xbd80bab3, 0x3d820b2d, 0x3d893ed5, 0x3d6cc3f2, 0xbd579be0, 0x3d4aff3a,
0xbdaa21ab, 0xbd3f0063, 0x3d309d5a, 0xbc76d488, 0xbc947420, 0xb9d77d00,
0xbcf10e30, 0xbd95628c, 0xbd0a576b, 0xbbc7eb90, 0x3d9e278d, 0xbd93adc3,
0x3ce18a20, 0x3cae06f0, 0xbbef3980, 0x3c951ac4, 0x3d3e7aea, 0xbc8b84cc,
0x3d1d235e, 0xbb533c80, 0xbcf53716, 0xbb5518e0, 0x3d09b736, 0x3c4245a0,
0x3bac8070, 0xbd3c4a9b, 0xbd17e246, 0xbd5e6703, 0xbd8cf436, 0xbd26ed80,
0x3d86dd01, 0xbd3953de, 0x3d9f7ecd, 0x3da99971, 0x3d8142d9, 0x3d77f962,
0x3d8e694d, 0x3c8f9720, 0x3d29f5f0, 0x3da2b95d, 0x3d441176, 0x3d954c0d,
0xbd8d7ca2, 0x3d691db2, 0xbcfa8bb6, 0x3d82379f, 0x3da5ea69, 0x3d4b5ffe,
0xbd80ba9f, 0xbd6b7473, 0x3cf3ae6c, 0x3d48c2de, 0xbd309196, 0x3cc590e4,
0xbbfbd930, 0xbc531940, 0xbce4b526, 0xbd3e999e, 0xbd30845e, 0x3d1114ca,
0xbc801fc4, 0x3d963419, 0x3d9e1e59, 0xbd14bd60, 0x3d8d42f1, 0x3d4d7052,
0xbd252866, 0x3d9e72d7, 0xbd13bfa0, 0x3d5877ce, 0x3d2382b8, 0xbd2090c3,
0xbd89eac2, 0xbd8b6e5b, 0x3c407860, 0x3ba1f480, 0xbbc0e000, 0xbd23f85b,
0xbd56f7c6, 0x3da21681, 0xbd89f284, 0x3c250938, 0xbc8da464, 0x3d0af776,
0xbda13d8a, 0x3daa67cb, 0x3abe13c0, 0x3d7437f6, 0xbd91eb66, 0x3d3c8f82,
0x3c07c488, 0x3d48e0ca, 0xbcd5b28c, 0xbd940127, 0x3c6ad8c8, 0xbcbb58a6,
0xbd891232, 0xbcb343b0, 0x3c9e3bfc, 0x3bc78cf0, 0x3d9a7bd1, 0x3c332128,
0x3d468eea, 0x3c9bdc54, 0xbca528ac, 0x3d85f30b, 0xbd9feb7e, 0x3d6ef656,
0x3c987604, 0xbd0e4006, 0x3c0fe698, 0x3d98f085, 0x3c88284c, 0x3d7a6ac2,
0xbd232328, 0xbcf65dd6, 0x3d8b1c87, 0x3c8579b0, 0xbd6c9a18, 0xbb8c4070,
0xbce53460, 0xb966da00, 0x3d96bf7f, 0xbd8e6da4, 0xbce5f490, 0x3d52ab1a,
0xbda75136, 0x3d4eb822, 0xbcd06aec, 0xbb83bd70, 0x3d0cd1ee, 0x3b944bc0,
0xbd85cd7e, 0x3d852373, 0xbd83b8ee, 0x3ba8cdb0, 0x3cd6a290, 0xbd38d6cb,
0xbd3d1808, 0xbc85171c, 0x3da3c7e5, 0xbd739646, 0x3c8ae160, 0x3d21d400,
0x3ccb75c0, 0xbc889df0, 0xbc72fc28, 0xbc17e118, 0xbd7a7630, 0xbc61a280,
0x3d0696ae, 0xbd8153da, 0x3d0f75ae, 0x3c39d300, 0xbd4ec36b, 0x3d87b79f,
0x3d303cca, 0xbd040c7e, 0x3c61e480, 0xbd467ac3, 0x3c94ba84, 0xbd458a26,
0x39c9c400, 0x3da4a66d, 0xbcbe4bb0, 0x3d0eb460, 0x3c9431b4, 0xbd8442ea,
0xbd8dfeff, 0xbc699a60, 0xbc989a74, 0xbd2eeb46, 0x3caecdb4, 0xbda8bd30,
0x3d820281, 0x3cb36bec, 0x3cea7350, 0x3d5f5bf6, 0x3d30c10a, 0x3d1d6ace,
0x3d41da46, 0x3c8bb4b0, 0xbc60f2b8, 0x3d9cdd91, 0x3d91abb9, 0xbd1b2748,
0xbc59dff8, 0xbc775560, 0x3d493bb6, 0xbd63f4ce, 0x3d5ff1ae, 0x3d979a9b,
0x3c82ebfc, 0x3d746e3a, 0xbd2269a3, 0x3d8ee721, 0xbd82397e, 0xbd9c315f,
0xbd39c52b, 0xba1c2e80, 0x3cdbe304, 0x3d1f1e42, 0x39e59000, 0x3d613952,
0xbda8895f, 0x3c5c46c8, 0x3cae243c, 0xbc63e420, 0x3c691308, 0x3d9d74d5,
0xba9f3440, 0x3c329100, 0xbcaeb7ec, 0xbd43f623, 0xbd1088e8, 0x3d9526a1,
0xbd1756e8, 0xbabed200, 0xbce877e0, 0x3d6c38c6, 0xbd4f0708, 0x3d855191,
0xbd1f726e, 0xbd3c8a5b, 0x3bf92500, 0xbd9791e2, 0x3da2554d, 0x3da04af7,
0xbbf66f80, 0xbd4b6d36, 0x3d6f47ea, 0xbc42df68, 0x3cb09534, 0xbba09dc0,
0xbd86382b, 0xbd70b530, 0x3d35f66e, 0x3aeb94c0, 0xbc878d1c, 0xbc168020,
0x3cb30270, 0x3cc2b0f0, 0xbc180f08, 0xbda1d4ee, 0xbd4ccd98, 0xbd894473,
0xbd9af53e, 0xbb0c2d60, 0x3d17f51e, 0xbd7b7eee, 0xbc5da258, 0xbd4dd2a3,
0x3da186fd, 0x3d134758, 0x3d193cc2, 0xbd115248, 0xbd8bf6a3, 0x3d031b48,
0xbd69ef46, 0xbc223768, 0xbcca49fc, 0x3c20e208, 0xbc83cb20, 0x3c1916b8,
0xbd8399a2, 0xbd8255ca, 0xbda299d2, 0x3d914165, 0xbcc6bf0c, 0x3a114400,
0x3cd1da4c, 0x3d4228c2, 0x3d7670c6, 0xbd1e2430, 0x3d629c22, 0xbd4473c0,
0x3d7feb8a, 0x3d9cdef1, 0x3cca8d20, 0x3d56a59a, 0xbd00ebd6, 0x3ccd2300,
0x3c0c9fa8, 0x3d5b419a, 0xbc144938, 0xbcc62d7c, 0xbda34f16, 0xbd173876,
0xbd8a6fd3, 0x3cbecc6c, 0x3cee48a4, 0x3c92f5bc, 0x3d8ea19d, 0x3d4007c2,
0xbd5ce9b0, 0xbd44c9a3, 0xbbdc8c40, 0x3d940e0f, 0x3d0eed0a, 0x3d055578,
0x3ba45490, 0x3c31d480, 0x3d755952, 0xbbe273d0, 0xbb1f86e0, 0xbd2a17e6,
0xbd3b3688, 0xbd85c086, 0x3cae4764, 0x3da0ae0d, 0x3d8840e1, 0x3d583022,
0xbd31600b, 0x3ab47ec0, 0x3d7743b2, 0xbcebc4c0, 0xbcaca0a6, 0x3d1f8a70,
0x3d8fa7f7, 0x3da7431d, 0x3d83c1fd, 0x3d2a48ee, 0xbd0b1903, 0x3c50cd78,
0x3bc170f0, 0xbd6136a3, 0xbd80455c, 0xbc50a718, 0x3c9e07a4, 0x3c6be758,
0xbc4dd160, 0xbd8690a4, 0x3d694d86, 0x3aff6500, 0x3d50db16, 0x3cd51a0c,
0x3d0f99b0, 0xbd1f047b, 0x3d6936aa, 0x3d8abc69, 0x3d0001c8, 0x3d4db9ae,
0xbd20db0e, 0xbd2bbe13, 0x3d7f21f2, 0x3cf3f014, 0xbbf91340, 0x3d39be8a,
0xbce444ec, 0x3da7e361, 0xbd123e96, 0xbd253e5e, 0xbc4a61f8, 0x3d7d283a,
0xbcb0f940, 0xbd3055bb, 0xbda15e84, 0xbd8c7f2f, 0x3daa2d09, 0x3b6c8920,
0xbd990704, 0xbd6569b8, 0x3c041668, 0xbd8e3924, 0xbd94a413, 0xbd420e3e,
0xbd396483, 0xbd7490ab, 0x3da40ca1, 0x3d48e22a, 0x3aa71100, 0xbd02a233,
0xbcf71146, 0x3d8b2c4d, 0xbd1a098e, 0x3aa1dcc0, 0x3ca652d4, 0x3bc7ce30,
0xbd1bbdb6, 0xbda6f36e, 0x3d09e0f0, 0x3c87be40, 0xbb0ed7e0, 0x3d49bbf2,
0xbd4c58f8, 0x3da8bae1, 0xbda70c08, 0x3c911f2c, 0xbd927990, 0xbceb18dc,
0xbd17b05b, 0x3b1281e0, 0xbca1db74, 0xbd9849da, 0x3c5786f8, 0xbc909ac4,
0x3d1c371a, 0x3b9e3a50, 0x3d3c374e, 0x3d883bd5, 0x3d056576, 0xbd86a74b,
0x3d8bc7e9, 0x3d620cb2, 0x3d8c0993, 0xbd88e19e, 0xbd5979e0, 0xbd898838,
0x3d6ee24a, 0xbd14ad70, 0xbd38acf8, 0xbbfe6bd0, 0xbcf2441c, 0x3c12b5f8,
0x3d7d8002, 0x3d9c2885, 0x3d7dabba, 0x3da9cb05, 0xbd21b2ab, 0x3cb0d1f4,
};
// 7
uint32_t bias_vals[] = {
0xbda771f8, 0x3da2ff01, 0xbbafffb0, 0x3d986e05,
0x3b05b520, 0xbd0b22e0, 0x3d55a4aa,
};
// 4,4,2,7
uint32_t output_exp_vals[] = {
0x3aae9b04, 0x3d11f138, 0x3ea6c5e5, 0x3e27faa2, 0x3e1af7ae, 0xbcd55ed5,
0x3d64578e, 0xbe34ca8f, 0xbea1a8b4, 0x3f224d3d, 0x3d8b4f5c, 0x3d5da826,
0x3d5ba7f0, 0x3e837ddb, 0xbc3cbed6, 0xbedabd2f, 0x3e0e76a3, 0x3e5e1078,
0x3eb6b112, 0xbbb0ae9e, 0x3ea6b36e, 0x3e7f83d2, 0xbe01a079, 0x3e566d4b,
0x3ecdcb4c, 0x3e278bed, 0xbe1ca4ea, 0x3e033ef1, 0x3e36026f, 0xbe814275,
0x3eda68e1, 0x3ece2c47, 0x3e99d145, 0x3de7c10a, 0x3e003eb8, 0x3ddc462f,
0xbf128f10, 0x3ef8c6c4, 0x3e0b4fe8, 0x3e234e9f, 0xbc68b0cc, 0x3ec20083,
0x3e2b8213, 0xbe83f3f4, 0x3f0cedbc, 0x3ecffe92, 0x3eab0eeb, 0x3a5ee371,
0x3eaf1fcd, 0xbdfdd7b5, 0xbe333d06, 0x3e4d244d, 0x3d9c8735, 0x3da1fd6f,
0x3dbdf320, 0x3edff713, 0x3da12ca6, 0xbe418c28, 0x3ea46203, 0x3ed77016,
0x3d4e8755, 0x3e85bec4, 0x3ec9875d, 0x3e7c02c8, 0xbed5c916, 0x3edba911,
0x3e9fc87d, 0x3e3d0777, 0x3d3e1d2d, 0x3dd82c20, 0x3e96fc25, 0xbf131fb2,
0x3f6dc404, 0x3e083262, 0x3e8bcd87, 0x3dfb8de2, 0x3db91ad3, 0x3ebcac31,
0xbe9b16af, 0x3ec8e755, 0x3f036f70, 0x3e78d3e5, 0x3cc1b424, 0x3e73c23c,
0x3d8a27e0, 0xbea947af, 0x3f006376, 0x3e3b6c9c, 0x3e145c90, 0x3b90ce9d,
0x3e8ef2c4, 0xbca2e459, 0xbe94037d, 0x3ee2e324, 0x3e83fadd, 0x3cdd4517,
0x3e2722a4, 0x3e85ec31, 0x3dde5094, 0xbef14213, 0x3f1f8519, 0x3ea5cc4d,
0x3e96bce1, 0xbbc4f4a2, 0x3e86b220, 0x3d91f714, 0xbf0b7eea, 0x3ede3416,
0x3ebd898b, 0xbdc5e339, 0x3d645a8c, 0x3ecd54ea, 0xbe7270b7, 0xbe2a28d0,
0x3efe2f4f, 0x3ecdd1bb, 0x3eb541ae, 0x3df78530, 0x3e2ff48b, 0x3ecf015e,
0xbdfc62ab, 0x3f306492, 0x3e32bcf1, 0x3d46d562, 0x3e26ff13, 0x3ebb5471,
0x3e0fdbb5, 0xbe8821f2, 0x3e9b86b6, 0x3dab64b8, 0x3e64597d, 0xbdbbcdc7,
0x3e3f9585, 0x3de4346a, 0x3dcbc01b, 0x3e5b0824, 0x3e80aced, 0x3e3d057f,
0xbdb1dc06, 0x3ed614bf, 0xbe4b9925, 0xbea5e903, 0x3efbac65, 0x3f0429b1,
0x3e397d7b, 0x3ef9017d, 0x3f09e093, 0x3cee88fd, 0xbf13fa56, 0x3ea66d86,
0x3ec4d5e4, 0x3deb3387, 0xbe3fe165, 0xbe29dbc3, 0x3e006749, 0xbe356e13,
0x3f4c4463, 0xbd05aef1, 0x3e5cacf5, 0x3bb3f58a, 0x3f0276b4, 0x3e19ffaf,
0xbefbb77f, 0x3f02c7a0, 0x3ed7c512, 0x3da438e7, 0x3e27f543, 0x3e0d4b61,
0x3e462486, 0xbeece080, 0x3f2434e2, 0x3d412397, 0x3d27f66b, 0xbe8014f3,
0x3e6c1353, 0xbe6eaff0, 0xbeba0a82, 0x3ed479f9, 0x3ea08d22, 0x3e3a5d62,
0xbe168c35, 0xbb8818a5, 0x3eb64d64, 0xbe94a7f4, 0x3ef4553a, 0x3e1b35e2,
0xbb866309, 0x3c700992, 0x3ee4e2bc, 0xbe081632, 0xbe209e82, 0x3ef13307,
0x3f0aaf13, 0x3d33f762, 0x3db0d374, 0x3dac7411, 0x3de43756, 0xbea7e814,
0x3e9bfcf1, 0x3e985384, 0x3e92c2f7, 0xbe351877, 0x3e0cf9db, 0xbea20a24,
0xbf169121, 0x3f10234e, 0x3e807156, 0x3e6978b4, 0xbd83f065, 0x3ecb7fbb,
0xbce91195, 0xbe653e1a, 0x3eef12cc, 0x3ded14a4, 0x3d0345ca, 0x3dbafae5,
0x3ebe95f8, 0x3e1207d8, 0xbeea224b, 0x3eea7e97, 0x3f063448, 0x3e843290,
0x3da4e66c, 0xbdcffdd6,
};
// 4,4,2,7
uint32_t output_relu_exp_vals[] = {
0x3aae9b04, 0x3d11f138, 0x3ea6c5e5, 0x3e27faa2, 0x3e1af7ae, 0x0,
0x3d64578e, 0x0, 0x0, 0x3f224d3d, 0x3d8b4f5c, 0x3d5da826,
0x3d5ba7f0, 0x3e837ddb, 0x0, 0x0, 0x3e0e76a3, 0x3e5e1078,
0x3eb6b112, 0x0, 0x3ea6b36e, 0x3e7f83d2, 0x0, 0x3e566d4b,
0x3ecdcb4c, 0x3e278bed, 0x0, 0x3e033ef1, 0x3e36026f, 0x0,
0x3eda68e1, 0x3ece2c47, 0x3e99d145, 0x3de7c10a, 0x3e003eb8, 0x3ddc462f,
0x0, 0x3ef8c6c4, 0x3e0b4fe8, 0x3e234e9f, 0x0, 0x3ec20083,
0x3e2b8213, 0x0, 0x3f0cedbc, 0x3ecffe92, 0x3eab0eeb, 0x3a5ee371,
0x3eaf1fcd, 0x0, 0x0, 0x3e4d244d, 0x3d9c8735, 0x3da1fd6f,
0x3dbdf320, 0x3edff713, 0x3da12ca6, 0x0, 0x3ea46203, 0x3ed77016,
0x3d4e8755, 0x3e85bec4, 0x3ec9875d, 0x3e7c02c8, 0x0, 0x3edba911,
0x3e9fc87d, 0x3e3d0777, 0x3d3e1d2d, 0x3dd82c20, 0x3e96fc25, 0x0,
0x3f6dc404, 0x3e083262, 0x3e8bcd87, 0x3dfb8de2, 0x3db91ad3, 0x3ebcac31,
0x0, 0x3ec8e755, 0x3f036f70, 0x3e78d3e5, 0x3cc1b424, 0x3e73c23c,
0x3d8a27e0, 0x0, 0x3f006376, 0x3e3b6c9c, 0x3e145c90, 0x3b90ce9d,
0x3e8ef2c4, 0x0, 0x0, 0x3ee2e324, 0x3e83fadd, 0x3cdd4517,
0x3e2722a4, 0x3e85ec31, 0x3dde5094, 0x0, 0x3f1f8519, 0x3ea5cc4d,
0x3e96bce1, 0x0, 0x3e86b220, 0x3d91f714, 0x0, 0x3ede3416,
0x3ebd898b, 0x0, 0x3d645a8c, 0x3ecd54ea, 0x0, 0x0,
0x3efe2f4f, 0x3ecdd1bb, 0x3eb541ae, 0x3df78530, 0x3e2ff48b, 0x3ecf015e,
0x0, 0x3f306492, 0x3e32bcf1, 0x3d46d562, 0x3e26ff13, 0x3ebb5471,
0x3e0fdbb5, 0x0, 0x3e9b86b6, 0x3dab64b8, 0x3e64597d, 0x0,
0x3e3f9585, 0x3de4346a, 0x3dcbc01b, 0x3e5b0824, 0x3e80aced, 0x3e3d057f,
0x0, 0x3ed614bf, 0x0, 0x0, 0x3efbac65, 0x3f0429b1,
0x3e397d7b, 0x3ef9017d, 0x3f09e093, 0x3cee88fd, 0x0, 0x3ea66d86,
0x3ec4d5e4, 0x3deb3387, 0x0, 0x0, 0x3e006749, 0x0,
0x3f4c4463, 0x0, 0x3e5cacf5, 0x3bb3f58a, 0x3f0276b4, 0x3e19ffaf,
0x0, 0x3f02c7a0, 0x3ed7c512, 0x3da438e7, 0x3e27f543, 0x3e0d4b61,
0x3e462486, 0x0, 0x3f2434e2, 0x3d412397, 0x3d27f66b, 0x0,
0x3e6c1353, 0x0, 0x0, 0x3ed479f9, 0x3ea08d22, 0x3e3a5d62,
0x0, 0x0, 0x3eb64d64, 0x0, 0x3ef4553a, 0x3e1b35e2,
0x0, 0x3c700992, 0x3ee4e2bc, 0x0, 0x0, 0x3ef13307,
0x3f0aaf13, 0x3d33f762, 0x3db0d374, 0x3dac7411, 0x3de43756, 0x0,
0x3e9bfcf1, 0x3e985384, 0x3e92c2f7, 0x0, 0x3e0cf9db, 0x0,
0x0, 0x3f10234e, 0x3e807156, 0x3e6978b4, 0x0, 0x3ecb7fbb,
0x0, 0x0, 0x3eef12cc, 0x3ded14a4, 0x3d0345ca, 0x3dbafae5,
0x3ebe95f8, 0x3e1207d8, 0x0, 0x3eea7e97, 0x3f063448, 0x3e843290,
0x3da4e66c, 0x0,
};
test_conv2d(set, strides, input_vals, kernel_vals, bias_vals, output_exp_vals,
output_relu_exp_vals, VALID_PADDING, NULL);
}
void test_valid_padding_zero_strides_large() {
input_set *set = &large_input;
strides_input_set *strides = &zero_strides;
// 4,15,10,6
uint32_t input_vals[] = {
0x3dde9a10, 0x3f5666b9, 0x3e8a80fa, 0x3e0e30e4, 0x3ebd0716, 0x3f22510c,
0x3f0f0b05, 0x3f0b527d, 0x3f46e8f1, 0x3f0e2236, 0x3f3843d9, 0x3f11a3bb,
0x3e53223c, 0x3e9ebd48, 0x3f7f5de3, 0x3ed7b118, 0x3d040570, 0x3e91fc24,
0x3f65f617, 0x3e86c634, 0x3d95c918, 0x3f7380cd, 0x3f234774, 0x3edabe94,
0x3e7135f4, 0x3e350480, 0x3e5e08cc, 0x3f2f8802, 0x3f6d3d5b, 0x3ed9ee36,
0x3eccc264, 0x3ea07fe0, 0x3f7c6112, 0x3f2b105b, 0x3ebd523c, 0x3f315182,
0x3f39ff9c, 0x3e83e828, 0x3f62ed12, 0x3f5dfc1c, 0x3ef1d4fe, 0x3b973980,
0x3f25010e, 0x3df4f550, 0x3f216f1a, 0x3e72ac50, 0x3f3f925f, 0x3ef34a1c,
0x3f1514ea, 0x3f1912bd, 0x3f7518ff, 0x3f560cc0, 0x3ee8d69a, 0x3f28636f,
0x3ef8336a, 0x3e366224, 0x3efab474, 0x3f00be28, 0x3efabf90, 0x3f268971,
0x3ecc927e, 0x3efa2e24, 0x3f1e3dea, 0x3d25ae80, 0x3f28c692, 0x3e699f80,
0x3e892528, 0x3f5c0fa4, 0x3f3d0b21, 0x3f504cea, 0x3eec2d20, 0x3ea48b0c,
0x3f7c7068, 0x3e2b6d44, 0x3f7788a7, 0x3e275054, 0x3efed888, 0x3f4bfe3c,
0x3f6120fe, 0x3f504c76, 0x3dff1a98, 0x3f4e8175, 0x3ef0831c, 0x3f1ddffc,
0x3e82330a, 0x3dc33bc8, 0x3f394b53, 0x3f634e17, 0x3f47ebf7, 0x3f7f3c84,
0x3f4cd033, 0x3d9d2098, 0x3f341604, 0x3eed28a2, 0x3f763a8b, 0x3ea42184,
0x3f29d214, 0x3f7b3dc8, 0x3e94f71c, 0x3eabbd3a, 0x3f0b6fd7, 0x3f46fac2,
0x3e276790, 0x3e82797c, 0x3e92c996, 0x3f1592e2, 0x3f12d101, 0x3edcf45a,
0x3e86d9bc, 0x3f6d4119, 0x3f30d665, 0x3f5fad7a, 0x3e13afb0, 0x3f144cd9,
0x3efede78, 0x3f72d999, 0x3e4f1154, 0x3f40f5ea, 0x3f474e3b, 0x3efa4892,
0x3e5460cc, 0x3f23568b, 0x3f450c05, 0x3f61a5aa, 0x3f4859d2, 0x3cd13f40,
0x3e3f6d04, 0x3e805646, 0x3f53dfe9, 0x3ef89136, 0x3e0add1c, 0x3f33e7df,
0x3efae34a, 0x3f2113b1, 0x3f3ed68e, 0x3dfa3530, 0x3f4b139a, 0x3f233a7e,
0x3d8516b0, 0x3f1aa364, 0x3e66ff18, 0x3f38dcf6, 0x3f231575, 0x3d83d8c0,
0x3e2fe2e0, 0x3f1aa7d5, 0x3f78784e, 0x3f096b77, 0x3e45bb30, 0x3ef7329e,
0x3f145b96, 0x3f0ff17d, 0x3f30c586, 0x3f1e8e09, 0x3ed5ce52, 0x3f17f212,
0x3ee5824a, 0x3f7dc58f, 0x3c189280, 0x3f772b3a, 0x3d60d290, 0x3f67010e,
0x3f3d57cf, 0x3f11a4b3, 0x3c8f9220, 0x3ec55dfa, 0x3f1152b0, 0x3f7e784b,
0x3f5b8914, 0x3f3f87da, 0x3f2d606d, 0x3f7465f2, 0x3f4048d2, 0x3ed29954,
0x3f51c6fa, 0x3ea0a238, 0x3f3b0cd7, 0x3e51a488, 0x3e1e8910, 0x3ed2c5de,
0x3e8d776e, 0x3eabf5c4, 0x3f6f08c2, 0x3e34abe8, 0x3eecc686, 0x3ec6b340,
0x3f0ef530, 0x3f6a2f92, 0x3f6312d6, 0x3f53b437, 0x3f64b769, 0x3e071134,
0x3eaaf75e, 0x3eea6cbc, 0x3f4f7b3c, 0x3f6153a5, 0x3f621982, 0x3f3e978e,
0x3f1f06ec, 0x3f35445c, 0x3eb2f924, 0x3e9ec55e, 0x3f51c216, 0x3f7d51ea,
0x3d2a5290, 0x3e8f57d2, 0x3eeea1a0, 0x3f177ad9, 0x3e6167f4, 0x3f6e0812,
0x3f682c4a, 0x3f18b998, 0x3e3f51dc, 0x3f0eb695, 0x3efa3014, 0x3d83a8f0,
0x3f5400e4, 0x3de51a28, 0x3f044430, 0x3f28dfee, 0x3e848d58, 0x3d1c74a0,
0x3ec975c6, 0x3f11e457, 0x3f1b0942, 0x3ed94f5a, 0x3f727868, 0x3e330edc,
0x3e920154, 0x3f13b95a, 0x3f3a6348, 0x3c5207c0, 0x3f05a886, 0x3ed204f2,
0x3f6a800e, 0x3eea6228, 0x3f1a60d5, 0x3f2abe0b, 0x3e4471d4, 0x3ebdc5c4,
0x3f46891e, 0x3f339bc7, 0x3f173a63, 0x3c6fa340, 0x3edc80ca, 0x3cf6d5a0,
0x3f7c30af, 0x3f44aa42, 0x3f001b8b, 0x3e1f0ad0, 0x3f071aa5, 0x3d830718,
0x3e519f6c, 0x3f5af810, 0x3ddd5948, 0x3dd93078, 0x3f2b2a8f, 0x3dd55958,
0x3e186300, 0x3d32ef30, 0x3e906c9e, 0x3f325c14, 0x3ed906a2, 0x3eb646dc,
0x3d4eb620, 0x3eec02b6, 0x3ec49966, 0x3f37af89, 0x3f15ac66, 0x3f021455,
0x3ed9680a, 0x3f49fa35, 0x3f223794, 0x3e55bda0, 0x3cecfce0, 0x3f7af654,
0x3f00a73a, 0x3f55119d, 0x3f04f474, 0x3f729c90, 0x3eb28c82, 0x3ce7c2c0,
0x3f6852b3, 0x3ddd8638, 0x3e5ff158, 0x3e189898, 0x3f46bbe7, 0x3f4e5dcf,
0x3e769b38, 0x3effedc6, 0x3e88efca, 0x3f5d77a8, 0x3f348d05, 0x3e978342,
0x3d546e00, 0x3ef2d14a, 0x3ec0c2b2, 0x3f38002e, 0x3f7d2946, 0x3f7ea4b8,
0x3f056100, 0x3ed5704c, 0x3f6d2747, 0x3ec7e3f6, 0x3f663e4e, 0x3da77ed8,
0x3f169043, 0x3f36da8a, 0x3f5562d8, 0x3f654053, 0x3f426c9a, 0x3d6bb610,
0x3f233c4c, 0x3f359222, 0x3d8ffb60, 0x3f5e3978, 0x3e83b710, 0x3eda2fc0,
0x3e9adcb2, 0x3d3ad540, 0x3e6a84ac, 0x3f688790, 0x3f737dc8, 0x3f34f35b,
0x3ddcc4f8, 0x3f147b0e, 0x3f2fa60f, 0x3dc02228, 0x3f57c84d, 0x3dba3300,
0x3d0b1030, 0x3e6c3878, 0x3ea7c9de, 0x3ea03e24, 0x3ea43e30, 0x3ec423e2,
0x3f7f26eb, 0x3f420836, 0x3eb02a96, 0x3e159168, 0x3e896ebe, 0x3ddc4ee0,
0x3ebd628e, 0x3efa24b2, 0x3dbdf0b8, 0x3f6638e7, 0x3e80da60, 0x3f492d7d,
0x3ecff6b8, 0x3f163ca6, 0x3ee5d554, 0x3f1cae60, 0x3e3afd54, 0x3e86460a,
0x3f45a3ef, 0x3edc3dc0, 0x3f2c2859, 0x3e7cf8ac, 0x3f334c75, 0x3e842c28,
0x3e7026d0, 0x3f65fdbb, 0x3f719460, 0x3ede5fd6, 0x3e75b1ac, 0x3d8c6be8,
0x3d2c88d0, 0x3daf1b08, 0x3eba86ea, 0x3f586ea8, 0x3f712640, 0x3f26be89,
0x3c52abc0, 0x3f37a1b8, 0x3e9ab7ae, 0x3f778239, 0x3f1cc44e, 0x3f759a65,
0x3ef2481a, 0x3f7c9113, 0x3f17a8e7, 0x3f010e4a, 0x3f0226dc, 0x3f20b226,
0x3f2863ba, 0x3e9adea6, 0x3f0a25ee, 0x3d3f6df0, 0x3f284520, 0x3f320f11,
0x3f3c65be, 0x3f20ed9d, 0x3f0f8492, 0x3e8b1e8c, 0x3f4810fe, 0x3f641106,
0x3e8cdbce, 0x3e8199ca, 0x3d13ded0, 0x3f59d926, 0x3ed58276, 0x3eeebb88,
0x3f095c8e, 0x3e845efc, 0x3f63f1ad, 0x3f137a99, 0x3de7ee08, 0x3efeb994,
0x3f05beda, 0x3e688e04, 0x3efcfb46, 0x3f55867b, 0x3c3e7dc0, 0x3f6d645e,
0x3f4e03a8, 0x3f3f44fe, 0x3eea0742, 0x3f697c49, 0x3f7706f6, 0x3eaa1804,
0x3e4bf2f0, 0x3f528e35, 0x3ead3262, 0x3ea84e78, 0x3f77c29f, 0x3d2ab6e0,
0x3f5e5096, 0x3ede2990, 0x3d20b2c0, 0x3f4a4c64, 0x3ea004aa, 0x3f107192,
0x3ec62bdc, 0x3ee0dd08, 0x3eab5996, 0x3ee2688e, 0x3f70b6ca, 0x3f367c38,
0x3f3703c0, 0x3e0e7294, 0x3d27dbd0, 0x3e8e7d26, 0x3f32af3f, 0x3f2a3f2a,
0x3db1c370, 0x3f4519b3, 0x3f34aa2b, 0x3e1e5c14, 0x3d85acc0, 0x3f5f1e1f,
0x3ea4b136, 0x3f3a3b66, 0x3e736488, 0x3f18ff06, 0x3ed88d1e, 0x3f6afd1d,
0x3e64fbc0, 0x3ef46cb2, 0x3d3e21b0, 0x3f1be89b, 0x3d468400, 0x3f003634,
0x3e842706, 0x3e3e4764, 0x3e7c9e38, 0x3f53fd99, 0x3f378a79, 0x3f4b4832,
0x3de73ed8, 0x3f4036f6, 0x3f321383, 0x3f7d92f0, 0x3f2d9197, 0x3d9fbdd8,
0x3f1d5f3f, 0x3e31b094, 0x3e630d20, 0x3ddcca98, 0x3f3ced3a, 0x3f0dbd5c,
0x3f7d0a1c, 0x3f535d3c, 0x3de89e08, 0x3ed6d14e, 0x3ef5b28e, 0x3f4b3164,
0x3f606410, 0x3dd17730, 0x3e9b5210, 0x3eb28bf4, 0x3f128966, 0x3f4e7f32,
0x3e401670, 0x3f2cee74, 0x3f78534f, 0x3f417f95, 0x3e4ca56c, 0x3cfe2aa0,
0x3ee6706a, 0x3e25c45c, 0x3f46bc4d, 0x3f3e6af8, 0x3dafa298, 0x3f70143c,
0x3ebbbce6, 0x3f0f0a79, 0x3f1e9e36, 0x3f7415e6, 0x3ea5b550, 0x3f044ea3,
0x3e902d2a, 0x3f7bbffe, 0x3ebe17f6, 0x3f58254c, 0x3f2eb2a9, 0x3f0d0d50,
0x3e8fe4b4, 0x3f59fd71, 0x3e978a5a, 0x3f56f198, 0x3ed0adf6, 0x3f078ee9,
0x3f220c69, 0x3e0186dc, 0x3f1a3fc2, 0x3df6e6f8, 0x3ecb2ba4, 0x3f01f111,
0x3e914772, 0x3ec2fa9a, 0x3f5c0c34, 0x3e2ce3c4, 0x3e688ce0, 0x3e4c5c6c,
0x3e197710, 0x3f153d72, 0x3de9fe40, 0x3dcb82d8, 0x3e802ffa, 0x3f6f96d3,
0x3f111cab, 0x3eaa9140, 0x3f639e7d, 0x3e81c8f4, 0x3e73f658, 0x3f28144d,
0x3f79a8bf, 0x3ed0dc28, 0x3c997ee0, 0x3e81f87c, 0x3ecb3056, 0x3e185a84,
0x3ef76e98, 0x3f60b77c, 0x3d44ae80, 0x3f596e31, 0x3f7791bc, 0x3def36c8,
0x3f7ba9b4, 0x3e00e470, 0x3cbd5740, 0x3e83c666, 0x3ebe7e1c, 0x3f47be8d,
0x3e38b0f8, 0x3ddd5388, 0x3f296cd3, 0x3f6dcc4d, 0x3ede214a, 0x3f64cb7c,
0x3f6b83d7, 0x3db74d28, 0x3f05418c, 0x3f6c030d, 0x3e72fb40, 0x3f170005,
0x3efa00aa, 0x3efb578c, 0x3f1fea78, 0x3f4fce40, 0x3e488180, 0x3f700aa9,
0x3f5b50a2, 0x3ed435e6, 0x3e086648, 0x3f4b174c, 0x3c2939c0, 0x3eeae46a,
0x3f114a86, 0x3f214240, 0x3e4ac3f8, 0x3f5ae693, 0x3e39caa0, 0x3e9b7cb0,
0x3f207954, 0x3e679794, 0x3f35f930, 0x3f14fab7, 0x3eb63b42, 0x3f3ae46c,
0x3da38db0, 0x3e777d44, 0x3f5b4f5b, 0x3e2bd12c, 0x3ec4c640, 0x3efe0a04,
0x3ed14470, 0x3e8b93fc, 0x3da4eed8, 0x3f5fa53c, 0x3e1f86ec, 0x3f0f9ac9,
0x3efcee4a, 0x3f313a50, 0x3f071c15, 0x3f5d44c9, 0x3eec2ebe, 0x3f0a13cf,
0x3ed6d21e, 0x3f7b9ed5, 0x3eef0120, 0x3f571c5b, 0x3f6befec, 0x3f607b3f,
0x3f537d7e, 0x3d8b4a48, 0x3ec1ad4c, 0x3e1a8fa0, 0x3de922f8, 0x3f3bcb03,
0x3ea04e1c, 0x3f466874, 0x3f2c44c4, 0x3edfbbe4, 0x3f4fe4dd, 0x3f43503d,
0x3e99177a, 0x3f547e6b, 0x3df35af8, 0x3f52ffa7, 0x3dec08e0, 0x3f0e64c0,
0x3e0c8588, 0x3e171508, 0x3f3edf17, 0x3ef7e06e, 0x3f1732af, 0x3eb4c858,
0x3f2a4919, 0x3e552a04, 0x3e9c4d5a, 0x3eede6c6, 0x3f013ec9, 0x3e1b7d40,
0x3f69938c, 0x3f636881, 0x3f5664e3, 0x3e6e669c, 0x3dc7b5f0, 0x3edf5cc4,
0x3f16adc4, 0x3e9c66ce, 0x3ec0871e, 0x3f40173a, 0x3e79c3cc, 0x3d6829e0,
0x3e45a1a4, 0x3f44d2d6, 0x3f59ba11, 0x3f3873af, 0x3e866832, 0x3e5f5550,
0x3f3095ba, 0x3f0a1994, 0x3e2a01b0, 0x3ec81fee, 0x3df462b8, 0x3ec164cc,
0x3f111a8d, 0x3d5dc7e0, 0x3d3c9cb0, 0x3f5e78ac, 0x3f0956a1, 0x3f5f80b1,
0x3f512aba, 0x3f44f54a, 0x3f7dd77c, 0x3e52c9ec, 0x3dd14f80, 0x3b5cda00,
0x3effc556, 0x3f0df3ec, 0x3ea90e42, 0x3f42940f, 0x3f7734a2, 0x3f4b1e5e,
0x3f776aa0, 0x3e485e24, 0x3f011595, 0x3ef540ae, 0x3f0748a5, 0x3e46d958,
0x3f6ec052, 0x3f166e84, 0x3f5db818, 0x3e988ebe, 0x3df37ec8, 0x3d138ce0,
0x3e39f538, 0x3e9f145a, 0x3f72514e, 0x3f13ceab, 0x3e99a322, 0x3e262edc,
0x3f617b26, 0x3e5d84dc, 0x3f1addd1, 0x3d824e90, 0x3f6fc8f7, 0x3e26588c,
0x3ea986fa, 0x3eb074ae, 0x3d090e40, 0x3f688236, 0x3ef57cd8, 0x3f7bfe40,
0x3c3dcf80, 0x3dba81f8, 0x3e42aa74, 0x3ec9e218, 0x3f3acc72, 0x3f5d4b80,
0x3f7789e4, 0x3d112700, 0x3f4fab9f, 0x3e7883c0, 0x3f57ece7, 0x3f799bcd,
0x3e6c8aa0, 0x3efaa290, 0x3f2899ed, 0x3f0c8eff, 0x3e7a7e68, 0x3e6b8fd8,
0x3f145bda, 0x3e320ddc, 0x3f45866a, 0x3f49a803, 0x3f6dd94c, 0x3ca89800,
0x3f75a2de, 0x3f2b8b90, 0x3e9fe78a, 0x3f5f2f68, 0x3e718f34, 0x3ec2cddc,
0x3e84f64e, 0x3f569922, 0x3d141e50, 0x3f54a651, 0x3daa4730, 0x3f65103c,
0x3f5c03bb, 0x3ee2a65a, 0x3ed6f704, 0x3ed0de16, 0x3f4d955c, 0x3f420b48,
0x3f1e7c00, 0x3d9e5cf0, 0x3f332f34, 0x3f1d0d66, 0x3df5cb48, 0x3dd27670,
0x3f00f0a1, 0x3d2ef880, 0x3f595d1d, 0x3f757dff, 0x3e2434c8, 0x3eb6ee10,
0x3f3160b4, 0x3f4340a3, 0x3e2bf9d8, 0x3f44c362, 0x3df45d88, 0x3f03aaf1,
0x3e8aedb0, 0x3f0864ba, 0x3e82663e, 0x3f7f893c, 0x3f2c9e98, 0x3f7bef69,
0x3e92bda0, 0x3d8595e0, 0x3f6ac9e2, 0x3f162920, 0x3ef30786, 0x3e1fbfe4,
0x3d9d8840, 0x3e8fffb8, 0x3f690843, 0x3ca13020, 0x3f6e3e98, 0x3f56cb81,
0x3f699860, 0x3f2800b6, 0x3f7c2a76, 0x3f25f83f, 0x3e8f9a3a, 0x3f4b45c1,
0x3e941d82, 0x3f0a6617, 0x3f5b568c, 0x3ebefe56, 0x3f062dc4, 0x3e6c3608,
0x3f644910, 0x3f3d9385, 0x3f2248e3, 0x3f16e417, 0x3f422fc9, 0x3e02743c,
0x3f10b997, 0x3f787f63, 0x3eff6528, 0x3f31f201, 0x3ed63288, 0x3e4dc254,
0x3eef9eba, 0x3ea424f0, 0x3e033898, 0x3f483b3e, 0x3eee0e60, 0x3f54a3ae,
0x3f707107, 0x3eb1d8b8, 0x3ea3f662, 0x3e86a1f0, 0x3e3a55a0, 0x3c5b4080,
0x3f400a5d, 0x3ba1e600, 0x3c245d80, 0x3e03f8dc, 0x3f4bd525, 0x3f56d750,
0x3f465ca5, 0x3f753a24, 0x3f53932c, 0x3f753932, 0x3f3002fa, 0x3f7573e1,
0x3edc2d02, 0x3eea00b4, 0x3f4ef31c, 0x3f2061a2, 0x3ed42d84, 0x3cfc5a00,
0x3f3582b0, 0x3eb5e528, 0x3f39b688, 0x3f3d6023, 0x3e93d0ac, 0x3ed99934,
0x3ca1a700, 0x3ee8cbc6, 0x3ed281a4, 0x3f22db40, 0x3f553c37, 0x3f569447,
0x3ef9dd52, 0x3e9119ae, 0x3d9d6450, 0x3ed21636, 0x3d7c0c80, 0x3ebf4de0,
0x3eff1cdc, 0x3f0d44fd, 0x3f6fa051, 0x3d224dc0, 0x3f35bc2d, 0x3f36af8d,
0x3dabfb38, 0x3f5f0742, 0x3f1d0ba1, 0x3f72de8e, 0x3f595940, 0x3efda5c6,
0x3ed11820, 0x3f2caeca, 0x3f1b0ee9, 0x3f79485e, 0x3eaa2076, 0x3e8c2908,
0x3f2aab2a, 0x3f1fb784, 0x3ebed2b0, 0x3efaad12, 0x3f32d9c5, 0x3f5ca5aa,
0x3f13aac6, 0x3f33e900, 0x3e9c048a, 0x3ebeec30, 0x3de4ace8, 0x3ee1bf3e,
0x3eda1554, 0x3e4304a0, 0x3eccea32, 0x3c185780, 0x3a62c000, 0x3f129687,
0x3f2b6afb, 0x3ef8dd9a, 0x3efef2c8, 0x3efabb70, 0x3f31329b, 0x3eaaf4b2,
0x3e705b30, 0x3f359ace, 0x3d755780, 0x3e03656c, 0x3f5aee42, 0x3f601ac2,
0x3ecf12c2, 0x3f1542cb, 0x3f616c0f, 0x3f65fa63, 0x3eb25ac8, 0x3f3150f8,
0x3e4f0330, 0x3f7f2005, 0x3c8f2020, 0x3d7e11f0, 0x3ce24b60, 0x3f4f1d9b,
0x3e59a0f8, 0x3e95f830, 0x3e8259fc, 0x3e9af1a6, 0x3e9d9f6a, 0x3f2be0f4,
0x3e58d5cc, 0x3c3b3ec0, 0x3db825b0, 0x3f22da62, 0x3f33d2af, 0x3f2a8f48,
0x3eeebd58, 0x3f47df0d, 0x3e562c84, 0x3f389e47, 0x3f73c47b, 0x3f0f051d,
0x3e09338c, 0x3f09097a, 0x3f2636c7, 0x3f7fdd98, 0x3f43b245, 0x3e887714,
0x3ee2db9a, 0x3dc37750, 0x3e4248b8, 0x3f68cb57, 0x3e9e4ddc, 0x3ef87f02,
0x3eae45ac, 0x3ef16d84, 0x3e9b63ba, 0x3f15d482, 0x3f7686fc, 0x3f32374b,
0x3ecaa86e, 0x3d4eb820, 0x3e189168, 0x3f657fb2, 0x3f487ddb, 0x3e485900,
0x3e87ec2e, 0x3f4c0789, 0x3ef47170, 0x3f05f39d, 0x3f071df5, 0x3f7ef527,
0x3e44b92c, 0x3f741fe0, 0x3f72785a, 0x3e8d064a, 0x3f56187b, 0x3f6369db,
0x3f763261, 0x3f20d6c1, 0x3f361d8b, 0x3f01b064, 0x3e3e8e40, 0x3e477d3c,
0x3f30fba9, 0x3f7de0ed, 0x3f4e06a5, 0x3f120740, 0x3f0fc4b2, 0x3e124838,
0x3f6bd6fc, 0x3e524bb8, 0x3dd3ae18, 0x3f026d51, 0x3f0c2ae3, 0x3f1f40f5,
0x3ef1961c, 0x3ed25ff8, 0x3ea8a132, 0x3ef53324, 0x3efd0554, 0x3e04ffa0,
0x3f5e4d19, 0x3f15b827, 0x3f3a2ce0, 0x3e509920, 0x3f578a80, 0x3f35395e,
0x3f0202ea, 0x3ef45882, 0x3ee79d08, 0x3f311f86, 0x3f46114c, 0x3f527009,
0x3f03040e, 0x3f67a752, 0x3f636c40, 0x3f305276, 0x3f1d0907, 0x3f375100,
0x3d14c6c0, 0x3f6427f7, 0x3e173860, 0x3f4086d0, 0x3f251e5a, 0x3f08e8aa,
0x3f2c5678, 0x3ebfe734, 0x3ca7c0c0, 0x3ea0cb40, 0x3e240a40, 0x3e7771dc,
0x3f46629f, 0x3d2f2a90, 0x3de78420, 0x3f2098a2, 0x3f6036b8, 0x3d57cbd0,
0x3ec24cb8, 0x3f1abc4f, 0x3e7fa680, 0x3f531f0a, 0x3f008460, 0x3f25578b,
0x3d686ee0, 0x3f509a87, 0x3f59da6b, 0x3e893680, 0x3e8757ae, 0x3f5db794,
0x3e1a2738, 0x3f0f2303, 0x3ea6bab0, 0x3eb59ca6, 0x3f21173a, 0x3ef9bf4c,
0x3f58b072, 0x3ea3aede, 0x3e826bf6, 0x3f48612c, 0x3e9ae01e, 0x3ef38c3e,
0x3eb5d910, 0x3f231879, 0x3ef604e6, 0x3e5c26e8, 0x3f0ce0b9, 0x3e993b1e,
0x3f40f716, 0x3d03ba80, 0x3f46ef54, 0x3f16e89e, 0x3f0a2dab, 0x3f09c400,
0x3f17d07c, 0x3f6a60a4, 0x3b3eb100, 0x3ec323c4, 0x3f11e57d, 0x3e9e09d0,
0x3f6dc06e, 0x3f113b2d, 0x3ec51676, 0x3ea7c88e, 0x3f51a558, 0x3dc49d10,
0x3ef73706, 0x3f7c9820, 0x3ea1bf14, 0x3cd7c060, 0x3f520de6, 0x3f09a730,
0x3f1d1603, 0x3f6d2e4e, 0x3e36f550, 0x3f2b96c8, 0x3f65850c, 0x3ea4292e,
0x3e1bf0b4, 0x3f48ff2d, 0x3eb264c2, 0x3f04938b, 0x3e04a5a4, 0x3e719f1c,
0x3e2851e0, 0x3ea32a5c, 0x3ee7f486, 0x3d942018, 0x3edb51c2, 0x3d9b0df0,
0x3f5f80d5, 0x3e79d294, 0x3d6368e0, 0x3ed616ec, 0x3ec07510, 0x3eb2237c,
0x3e336bfc, 0x3f0857f4, 0x3f643c6a, 0x3f0a9bd0, 0x3ea97538, 0x3f0cc6ea,
0x3f0349d7, 0x3f7837e4, 0x3f28e3c4, 0x3f182155, 0x3c2022c0, 0x3dcfc128,
0x3cc5d4e0, 0x3edc633c, 0x3f049650, 0x3dc3f3d0, 0x3f554ea0, 0x3f14f358,
0x3e9f12d0, 0x3f5e568b, 0x3efee806, 0x3ee80fbe, 0x3efa549a, 0x3eb13138,
0x3edea102, 0x3ee5caa8, 0x3f14c6f6, 0x3f5f7be2, 0x3f6a3552, 0x3f3530d0,
0x3f4d53c4, 0x3f328d76, 0x3f762e45, 0x3d4a4180, 0x3f78be4d, 0x3f364eb2,
0x3f4f252a, 0x3e675ea4, 0x3f18c5b8, 0x3e16f590, 0x3f2c6606, 0x3ecca7fe,
0x3f2c1590, 0x3f03fae4, 0x3f046c04, 0x3f5054bf, 0x3eb195a4, 0x3cbb6940,
0x3ec22620, 0x3f6e4ef7, 0x3ef9a4e6, 0x3e97d63e, 0x3f72afeb, 0x3ec5c2dc,
0x3f3718ee, 0x3dbd3cc0, 0x3e243758, 0x3e83a3f2, 0x3f4afacc, 0x3db27ea8,
0x3f20dfa1, 0x3e8267e0, 0x3bf90e00, 0x3e3e79ec, 0x3f4af74f, 0x3f0b254d,
0x3f2904d4, 0x3ed25a54, 0x3ecf2002, 0x3f216188, 0x3ed106f4, 0x3ce40b80,
0x3f128659, 0x3d0af280, 0x3d42f880, 0x3f0c2351, 0x3f1b1295, 0x3dcd9fa0,
0x3eef4b00, 0x3d8d4058, 0x3f34d4e6, 0x3f022e1e, 0x3dc43d48, 0x3f4a4526,
0x3f77f9c3, 0x3f027a21, 0x3e61a9a0, 0x3f28a426, 0x3f6a5e14, 0x3e7ed790,
0x3f595100, 0x3ee07650, 0x3f103dca, 0x3f265efc, 0x3f6d0eda, 0x3f208d06,
0x3eee460a, 0x3f53c6ad, 0x3f35f4e4, 0x3e491a8c, 0x3edfa122, 0x3f6ae4c8,
0x3c04b240, 0x3ee4a666, 0x3e640d3c, 0x3f057a38, 0x3ef2d08a, 0x3ddb1d30,
0x3f1ef9bd, 0x3f59b5f6, 0x3ec9467e, 0x3b9f4e80, 0x3ef417b8, 0x3d40d7b0,
0x3f166c75, 0x3f48c414, 0x3dc185e0, 0x3dcfb480, 0x3f29a020, 0x3dea2738,
0x3f3c5bc2, 0x3f63d794, 0x3ecdaf12, 0x3f4092f3, 0x3f04a597, 0x3f3bf424,
0x3cf68a80, 0x3ecaf7c4, 0x3f680a46, 0x3f0cd9b3, 0x3d9a6170, 0x3f74571d,
0x3f189457, 0x3f0fb14c, 0x3f32a257, 0x3e37c8a8, 0x3ee3f834, 0x3f1b9abb,
0x3f6c2eed, 0x3d81d488, 0x3f576f7a, 0x3e64d798, 0x3ea4c918, 0x3e0465f0,
0x3f10ee31, 0x3f49a007, 0x3e672bd0, 0x3f5ac69f, 0x3f4e398c, 0x3e66ea30,
0x3e819114, 0x3f35d445, 0x3d5f85c0, 0x3d9f6d18, 0x3e27ec0c, 0x3e95349e,
0x3e868136, 0x3f1a4348, 0x3e8b20d6, 0x3e4497f0, 0x3de0e670, 0x3c9df120,
0x3f266f3a, 0x3ec92d5e, 0x3e95f826, 0x3f6195ce, 0x3f6561ce, 0x3f32d7f1,
0x3f38602a, 0x3f744f23, 0x3f4c2594, 0x3dcf7210, 0x3db90100, 0x3f5f9ba4,
0x3de93160, 0x3db1ed90, 0x3e77e55c, 0x3e593b08, 0x3f3bd60c, 0x3f6a2099,
0x3f71fcfd, 0x3f432777, 0x3f526fea, 0x3f496542, 0x3ed3a442, 0x3d34b0d0,
0x3e7403f4, 0x3f6fef7c, 0x3dee3f40, 0x3dee2ba0, 0x3ce9a380, 0x3f2fbeb1,
0x3f1b8226, 0x3f2ce7bd, 0x3f242d28, 0x3f1f15bb, 0x3d15a8d0, 0x3ecf248c,
0x3f523e8c, 0x3f68f26e, 0x3f111dcd, 0x3f644868, 0x3f045a7c, 0x3cd52e40,
0x3e090228, 0x3e468aac, 0x3f5683b3, 0x3f1012b0, 0x3eea4c98, 0x3f4fa265,
0x3f599461, 0x3f72ce24, 0x3f54457a, 0x3e73f554, 0x3f31811f, 0x3f452b9e,
0x3f00ad20, 0x3f5e239e, 0x3dea9d98, 0x3e30e688, 0x3f1663cf, 0x3ef10362,
0x3eaf5b1e, 0x3e947300, 0x3f71bbe4, 0x3d271680, 0x3e7f71b0, 0x3f078c13,
0x3e3a54c0, 0x3f6d6c9a, 0x3f1f63db, 0x3f446f5a, 0x3f6d72bf, 0x3e371870,
0x3f66f42f, 0x3e8be2e4, 0x3f7bf867, 0x3ef936b6, 0x3eeeecae, 0x3f4ce3cc,
0x3f49ffd9, 0x3d9e5af8, 0x3f351566, 0x3efacb16, 0x3f19305c, 0x3eafecf6,
0x3ec8e93c, 0x3f77e805, 0x3d94bec0, 0x3e459a4c, 0x3e9cb5b2, 0x3eff2850,
0x3f1f0616, 0x3f27f7ac, 0x3f2049f2, 0x3de85d90, 0x3ec46a54, 0x3f6a5bf4,
0x3f22beb3, 0x3f476ea3, 0x3ef531ec, 0x3ecf4648, 0x3e63a008, 0x3e500980,
0x3dc9a988, 0x3f1f2875, 0x3f3d6ff0, 0x3f26476b, 0x3e4c7368, 0x3f79d8f9,
0x3f7163a8, 0x3f127746, 0x3e9acad6, 0x3f50416f, 0x3e0a3764, 0x3f407361,
0x3d863c60, 0x3f2aa2d2, 0x3efaecb4, 0x3f5e64d4, 0x3f3c2367, 0x3e456730,
0x3f2f91cc, 0x3f2f9a98, 0x3dce6770, 0x3ee98ae8, 0x3f4a12f8, 0x3e28fcdc,
0x3f3768b4, 0x3e2b0850, 0x3ed5462a, 0x3c33c800, 0x3f34576e, 0x3d99d928,
0x3efc18ee, 0x3f53b5cb, 0x3e6ff138, 0x3f593d23, 0x3bbd5400, 0x3f4e8810,
0x3e798830, 0x3f088dea, 0x3f25f6f3, 0x3e9a0dd6, 0x3f08c3ec, 0x3f0472d2,
0x3da2d0e0, 0x3f379feb, 0x3dce43b8, 0x3f0c2f75, 0x3cf2d820, 0x3edabe3a,
0x3f5492ed, 0x3d8458a0, 0x3f0159f9, 0x3eceb838, 0x3e99b766, 0x3f616097,
0x3ef170a4, 0x3e42ccb0, 0x3f0f828c, 0x3e5f56e4, 0x3f03cf3d, 0x3ecd7f68,
0x3e90cd66, 0x3e8d4228, 0x3f0370cc, 0x3ea8e362, 0x3f46f665, 0x3f310e94,
0x3ea651b8, 0x3db9f2e8, 0x3f7c709c, 0x3ed4b2f4, 0x3e9b356e, 0x3f16e8ff,
0x3f0de0ab, 0x3f63e075, 0x3d8be170, 0x3dd62588, 0x3f4a9656, 0x3eeaa6ae,
0x3f639f2b, 0x3f140ab8, 0x3f59faa2, 0x3e492ca8, 0x3f2e0ecf, 0x3e6a5404,
0x3d5142a0, 0x3e095f38, 0x3b06cb00, 0x3edeb7aa, 0x3f0248c7, 0x3e2a5bcc,
0x3f0fc87b, 0x3e43e100, 0x3e9552d4, 0x3f2c7ba7, 0x3f7ecf7c, 0x3d33aab0,
0x3d685c60, 0x3ec53d5e, 0x3e8d6620, 0x3f7f0ab4, 0x3ec0bdcc, 0x3f63fd18,
0x3dab59b0, 0x3f6bea4a, 0x3e7734a4, 0x3f529e4f, 0x3f1be6bb, 0x3f7435f7,
0x3e485f64, 0x3e966afc, 0x3f2923cd, 0x3f55761b, 0x3e93a548, 0x3f7e189d,
0x3f5e5618, 0x3f738a75, 0x3e96944e, 0x3ef4d644, 0x3f03db36, 0x3f3df7a2,
0x3ece0e08, 0x3f4624f6, 0x3eb398aa, 0x3f1bc3bf, 0x3f287e57, 0x3ec5f0da,
0x3f0f675e, 0x3e0fd6c8, 0x3f662cef, 0x3ea98894, 0x3b4d8000, 0x3e4c3fa4,
0x3f31763c, 0x3d4e91b0, 0x3f263f88, 0x3e26e93c, 0x3f5a3217, 0x3ba7b900,
0x3f1123cc, 0x3f2486d6, 0x3f251a39, 0x3f624d10, 0x3f3c0ac3, 0x3e9d338c,
0x3f30a70e, 0x3f577929, 0x3eb9511a, 0x3f3b7363, 0x3e9a3b0e, 0x3e1fc188,
0x3e77b2d8, 0x3ecd0964, 0x3f246d1d, 0x3f6e821f, 0x3e3afa50, 0x3eb97996,
0x3f228232, 0x3d7b7070, 0x3e91a548, 0x3f3ffb2e, 0x3e8a35f0, 0x3e173980,
0x3f63a28f, 0x3f2fa0c8, 0x3f1f55de, 0x3e4c82ac, 0x3f345672, 0x3e10f60c,
0x3c5c19c0, 0x3eaaedda, 0x3d8cdfb8, 0x3d408450, 0x3bfdcb00, 0x3ef9b4e4,
0x3f45e2b0, 0x3e86b840, 0x3d76dad0, 0x3f7e9142, 0x3e8928e4, 0x3e19d144,
0x3ec42918, 0x3f0ae221, 0x3f43a419, 0x3e0d8408, 0x3e1dc598, 0x3f0e0ec8,
0x3f492f98, 0x3f5fb339, 0x3f465f0e, 0x3eb6bf26, 0x3f715b5e, 0x3ef79e58,
0x3ec3e3be, 0x3d4e4d60, 0x3f50b567, 0x3e6be678, 0x3f463e1b, 0x3f6a34da,
0x3f1f2dd1, 0x3f1ccc19, 0x3e87800c, 0x3f305a79, 0x3b9ad580, 0x3f46943a,
0x3def1230, 0x3ef3bcb0, 0x3ed8abc8, 0x3f560325, 0x3da2e3d8, 0x3f370cca,
0x3eba530e, 0x3e20a1f8, 0x3e45f97c, 0x3edb8f50, 0x3e82c882, 0x3e29cd40,
0x3e88ff12, 0x3f6b4a53, 0x3e5c3df8, 0x3f131101, 0x3f64681b, 0x3f1a765a,
0x3dec0a60, 0x3ed95430, 0x3efe0a50, 0x3ef370ce, 0x3f5e58df, 0x3e5e6cbc,
0x3f21e634, 0x3f7a707f, 0x3ece85b2, 0x3e86eb6a, 0x3f454746, 0x3f4c0c0f,
0x3f774311, 0x3e13a5ac, 0x3f4b676d, 0x3f7c2b04, 0x3f2383a7, 0x3f75ea82,
0x3e02b694, 0x3f07cbba, 0x3f66b46f, 0x3f3cba22, 0x3de50fa8, 0x3dda54d0,
0x3f7e6bb7, 0x3ebc283e, 0x3f669e20, 0x3ebcc22a, 0x3ed48068, 0x3f6e57ba,
0x3f0e82d9, 0x3f1adb40, 0x3eaf97b0, 0x3e25b7a8, 0x3f6fa985, 0x3e443458,
0x3ca10900, 0x3c9317c0, 0x3f0260a1, 0x3e84a166, 0x3e4dc148, 0x3e958a98,
0x3eb75efc, 0x3f261269, 0x3ea3b3de, 0x3ebdad98, 0x3eb79c90, 0x3e5dd240,
0x3f53aa15, 0x3e6f350c, 0x3f17b9aa, 0x3f72c6e1, 0x3f554366, 0x3f296595,
0x3d9c2888, 0x3f47af71, 0x3cae2320, 0x3e932fec, 0x3ebe6b28, 0x3d7dd930,
0x3ed79088, 0x3e91fb4a, 0x3ece1b38, 0x3f0f2bb4, 0x3e5c4164, 0x3e458214,
0x3f78239d, 0x3efddbfa, 0x3ee7f49a, 0x3f4cdf09, 0x3e337188, 0x3f1466b1,
0x3eb1434a, 0x3de78d48, 0x3f1cc3a3, 0x3f00e0e5, 0x3e55763c, 0x3e82a3f0,
0x3f4b8499, 0x3e3719d8, 0x3f4211a3, 0x3f3aff13, 0x3d1ca000, 0x3f668513,
0x3ecd7990, 0x3f3ce6b0, 0x3ee89194, 0x3e825ce2, 0x3ee31ba4, 0x3ee91046,
0x3edd47ce, 0x3f723c6c, 0x3e860fe4, 0x3f024c5f, 0x3ef94040, 0x3f3194b7,
0x3e5c802c, 0x3f6aad09, 0x3f72d818, 0x3f0bfbed, 0x3f511cb4, 0x3f365213,
0x3f5faa57, 0x3db5dde8, 0x3ec7a944, 0x3f4b6844, 0x3e7e6b0c, 0x3e36d818,
0x3d70c5b0, 0x3eab8cfe, 0x3ed05dce, 0x3f691005, 0x3eb7cdfa, 0x3f6ab9f6,
0x3e2ebe40, 0x3f57e835, 0x3e5a4adc, 0x3e30116c, 0x3e18b34c, 0x3f536553,
0x3d58bc80, 0x3f326754, 0x3f3c8751, 0x3ec590cc, 0x3f7e3076, 0x3f2e3b1e,
0x3f10f414, 0x3f4786ad, 0x3f481efc, 0x3e4866d4, 0x3f015d2f, 0x3d2f6710,
0x3e4b5484, 0x3f740318, 0x3f46091e, 0x3f73b3d1, 0x3ca8cfa0, 0x3f7c01eb,
0x3f747219, 0x3eb9ba4c, 0x3f1276f4, 0x3f6c1f4e, 0x3f3aab14, 0x3e2c8170,
0x3f35994e, 0x3eaa01ee, 0x3e1cebb8, 0x3f611969, 0x3f4643db, 0x3f6a617f,
0x3f514489, 0x3f0a4a23, 0x3f764ae4, 0x3f2c6748, 0x3f56c6ac, 0x3f763b87,
0x3f0e1b99, 0x3d3e57c0, 0x3f56c4bb, 0x3eb551a2, 0x3f7fa8f0, 0x3efc0212,
0x3f678ac4, 0x3ec8cc8a, 0x3f4f855e, 0x3f2974d0, 0x3ecaa5ce, 0x3f37e2dd,
0x3e6ff598, 0x3f7933a6, 0x3f372480, 0x3db7be20, 0x3f1bee0e, 0x3e022cf4,
0x3eaa78c2, 0x3eeae1c6, 0x3f14aa90, 0x3efd21da, 0x3f611664, 0x3f56e65e,
0x3f6c08f4, 0x3ea216c8, 0x3f433e46, 0x3e5590a4, 0x3f1781a5, 0x3f099b7a,
0x3f1ae27a, 0x3f4fd0bb, 0x3ef14684, 0x3dd89030, 0x3f4f324a, 0x3eeb0542,
0x3edb9f22, 0x3e93ebb0, 0x3e66f1e0, 0x3f6970f1, 0x3e70e300, 0x3f3895e7,
0x3f597387, 0x3f0a9b1b, 0x3f3cc46b, 0x3f077686, 0x3ebd35bc, 0x3ef96736,
0x3dd0c998, 0x3dfd4988, 0x3f212a2b, 0x3eb7a320, 0x3b17b100, 0x3eb3c42a,
0x3f1582b1, 0x3f1c11b3, 0x3e8913fa, 0x3dbdeb30, 0x3df8fcb8, 0x3dc46af0,
0x3f352040, 0x3f589011, 0x3edc808c, 0x3e80d37c, 0x3d9236b0, 0x3e9f2e9e,
0x3f4d1f56, 0x3f08ccde, 0x3f34f622, 0x3e3d12cc, 0x3e223134, 0x3f44ff79,
0x3e6abc74, 0x3f144753, 0x3da02ee8, 0x3d764cb0, 0x3f2a681d, 0x3f3491f1,
0x3f0ba337, 0x3f6bd6ac, 0x3f12be1f, 0x3eaa3680, 0x3e553a74, 0x3f3d6fe9,
0x3f2611e3, 0x3f0463db, 0x3f1b3942, 0x3f5e14fd, 0x3c59a740, 0x3e2dd320,
0x3e8016e8, 0x3e9dd2ea, 0x3c22b540, 0x3d66d830, 0x3d0a2720, 0x3c96bce0,
0x3e843762, 0x3f61c34c, 0x3f127569, 0x3f64d6da, 0x3db25a28, 0x3ed2912e,
0x3f0f72b5, 0x3e0d5b28, 0x3dddbfd0, 0x3f21c60d, 0x3f23f63b, 0x3d6dda30,
0x3e85953a, 0x3f3c5bc9, 0x3d538620, 0x3f47704f, 0x3ea30326, 0x3f181936,
0x3f7fd50c, 0x3f530489, 0x3f7060a9, 0x3ee2e3fc, 0x3f044180, 0x3f3b70f7,
0x3f1f4a58, 0x3efd69da, 0x3f41cd24, 0x3e86283c, 0x3f3497e9, 0x3f3a71b3,
0x3eaa79c8, 0x3c131f00, 0x3f5ba1a1, 0x3f398f4f, 0x3f681381, 0x3de61d90,
0x3f1e5aae, 0x3f398ee4, 0x3f720039, 0x3f5d9df1, 0x3e442bbc, 0x3f1cc518,
0x3e920260, 0x3f511177, 0x3e6c3b04, 0x3f242e0c, 0x3f5c1cd5, 0x3f4eed1a,
0x3eec875c, 0x3ee452f6, 0x3e92b064, 0x3f3dfa7f, 0x3f3a4e23, 0x3ebfbc38,
0x3f22a1c9, 0x3f560a99, 0x3efff830, 0x3f7ff95d, 0x3efeceae, 0x3f78704e,
0x3f32e8de, 0x3f323d9c, 0x3f336afa, 0x3f178a2a, 0x3ea009d8, 0x3f483fce,
0x3f153090, 0x3f1c58d3, 0x3f0d2b3b, 0x3e809562, 0x3f255d6e, 0x3e2041ec,
0x3efff0f4, 0x3f7673c3, 0x3ee88ec8, 0x3f44190e, 0x3f0322d9, 0x3f1d55d4,
0x3e948cf0, 0x3e8534be, 0x3f7fe700, 0x3e69c60c, 0x3f2e0ebe, 0x3f6c03ae,
0x3d64f890, 0x3f146a56, 0x3e831b86, 0x3f6c3c3a, 0x3da9b8e0, 0x3eab7e4c,
0x3f603bf8, 0x3dfd1f68, 0x3f3a7662, 0x3f14a931, 0x3e98cbac, 0x3f2c15c3,
0x3ec342b8, 0x3f6f735d, 0x3f34e540, 0x3f3af08a, 0x3ee0eb1a, 0x3f0e338f,
0x3c8e3bc0, 0x3f28ebe0, 0x3e84250c, 0x3f13aa39, 0x3c61c100, 0x3f320917,
0x3f5e4614, 0x3f71c360, 0x3f48d744, 0x3f6cc4b8, 0x3e13ccb4, 0x3edf03c8,
0x3f670c60, 0x3dd16130, 0x3f583099, 0x3f4486fa, 0x3f24565a, 0x3df547f0,
0x3ec30c1e, 0x3f6f621d, 0x3ecd683c, 0x3f57bbd0, 0x3d2d3680, 0x3f10a4bd,
0x3f01f7f2, 0x3f501432, 0x3e25a45c, 0x3e864886, 0x3f48fe1f, 0x3f41e4d8,
0x3e23be98, 0x3f6f746f, 0x3f4ea9ed, 0x3f16a40d, 0x3f75d25b, 0x3eae07a2,
0x3f51766c, 0x3e8e9d3c, 0x3f60d151, 0x3f57ec87, 0x3efa1856, 0x3f2d3b5e,
0x3dafb860, 0x3db85348, 0x3f30155e, 0x3f59d2f3, 0x3ed13064, 0x3f029fd1,
0x3f5fc472, 0x3ed3041c, 0x3f0fcfa3, 0x3eaccfa4, 0x3ee427fa, 0x3e76d244,
0x3f2b864f, 0x3f654967, 0x3f3e733a, 0x3f60df6f, 0x3eac9918, 0x3e3524e8,
0x3f6cde8b, 0x3f7d9825, 0x3f2757ca, 0x3d37ee70, 0x3f7acfa9, 0x3e77aa98,
0x3e755d2c, 0x3ee84108, 0x3eb10a4e, 0x3f624610, 0x3e614f68, 0x3f5405b3,
0x3eee393a, 0x3f0e8e69, 0x3e06d1f8, 0x3e999c3a, 0x3df80670, 0x3f0ab050,
0x3efe2d26, 0x3ee8bc76, 0x3e9029bc, 0x3e1f1118, 0x3e954f74, 0x3f36d02f,
0x3f2f7c67, 0x3eeb9028, 0x3f2d43e7, 0x3f6b49e1, 0x3ee2bb78, 0x3f1bc70a,
0x3f21895a, 0x3f15c497, 0x3e98612c, 0x3efc8c6a, 0x3e54f3fc, 0x3e77f614,
0x3f1cf80a, 0x3f3beb10, 0x3f30d914, 0x3e9be6bc, 0x3ebf606c, 0x3f01c4c2,
0x3f7f623c, 0x3ec4ca68, 0x3e7636d8, 0x3f436c2b, 0x3eb0c2a2, 0x3efa261c,
0x3f200bea, 0x3de9b0b0, 0x3dfcff90, 0x3f5b7881, 0x3f693652, 0x3f2a5ae5,
0x3e52a3c8, 0x3ef8840c, 0x3f5ac69c, 0x3f0f4988, 0x3e8f3aae, 0x3d385340,
0x3e85b5c2, 0x3f46a618, 0x3e3c46b4, 0x3e113e64, 0x3f0b9306, 0x3e8a0b80,
0x3f48e8e7, 0x3dc768d8, 0x3f272abb, 0x3e8d5b7c, 0x3ef5aa78, 0x3f50361d,
0x3f7769dc, 0x3f77e957, 0x3f6c763e, 0x3e66579c, 0x3ee5f23a, 0x3d677940,
0x3d2f6c00, 0x3f2daff7, 0x3f077be5, 0x3f2d9ce3, 0x3e0924f0, 0x3f16ebc5,
0x3dbe7a68, 0x3f3c5b2a, 0x3f4ffcb0, 0x3cfa0760, 0x3f2f6364, 0x3f46d903,
0x3dcd8e50, 0x3f3df586, 0x3ebc4bca, 0x3e6d0a14, 0x3f52aa2c, 0x3f3573f4,
0x3f633133, 0x3f58229d, 0x3d8cda18, 0x3c9352c0, 0x3efa765c, 0x3f33bfca,
0x3e46c45c, 0x3f703e85, 0x3e8592e6, 0x3efd79f6, 0x3ec3d824, 0x3f2be1ce,
0x3f47fee3, 0x3e8f1ba0, 0x3eb138f2, 0x3f740733, 0x3f2d475b, 0x3e862a0a,
0x3e9a2daa, 0x3f21946d, 0x3e564fa8, 0x3f291afc, 0x3f04b722, 0x3f127996,
0x3f4c846a, 0x3f4ced47, 0x3e4c3bd4, 0x3f0d3585, 0x3ee064ce, 0x3f3e2443,
0x3ef5def4, 0x3ba20000, 0x3f383b0f, 0x3f3b2372, 0x3f23e5bc, 0x3e13ac30,
0x3eaf03fa, 0x3f4a22ba, 0x3f76bc54, 0x3e389b38, 0x3f410ba1, 0x3f6a336c,
0x3f5a8d80, 0x3ca3a940, 0x3f51a407, 0x3ec6d6c8, 0x3f3bd982, 0x3e7f5800,
0x3f7c3988, 0x3e31fcb0, 0x3f2b0471, 0x3f63f395, 0x3e71851c, 0x3f0048a0,
0x3e97dc4c, 0x3e53adf8, 0x3dd20ed0, 0x3da668a0, 0x3f2014d9, 0x3eddf04c,
0x3f5ccc7b, 0x3e23011c, 0x3ed57f9c, 0x3c3ae140, 0x3e945d80, 0x3efaecf8,
0x3dbd3af8, 0x3e1016b4, 0x3f2f15e9, 0x3eabbafc, 0x3f195dde, 0x3f7f98c4,
0x3df5f4d8, 0x3f4b85a3, 0x3f709498, 0x3d547530, 0x3ea4f138, 0x3f008bd8,
0x3f0238e0, 0x3d8bf808, 0x3e82c1e4, 0x3ec9f61c, 0x3eface24, 0x3f667ede,
0x3f6be85d, 0x3e83a310, 0x3f4bd848, 0x3f54fa2e, 0x3f2297dc, 0x3e0badd0,
0x3f05ecad, 0x3d0391b0, 0x3f498876, 0x3efd1096, 0x3ef6071e, 0x3ed7f250,
0x3f5fa873, 0x3e9abed6, 0x3ec8276a, 0x3f21602e, 0x3da602c8, 0x3c880fc0,
0x3f4c880d, 0x3eff0c3a, 0x3d2a8100, 0x3f116087, 0x3e0df66c, 0x3e5ea110,
0x3f500cd7, 0x3f2b0de5, 0x3f3b00ba, 0x3e201a14, 0x3eb102ba, 0x3f65e38a,
0x3f2cae99, 0x3e77fc98, 0x3f41727f, 0x3e7ebe48, 0x3e48ea6c, 0x3e5d3b70,
0x3c8b90a0, 0x3f16a40f, 0x3dd3a640, 0x3f0efb14, 0x3ce40dc0, 0x3f4b2b1d,
0x3df14440, 0x3f3e342b, 0x3f63b8bc, 0x3f6fed88, 0x3deaff38, 0x3e7f4a40,
0x3db4b090, 0x3f67b32b, 0x3ef0d598, 0x3e091444, 0x3e93bdfa, 0x3f6921d4,
0x3f1deac9, 0x3ece6476, 0x3dff8118, 0x3f543339, 0x3e3c701c, 0x3ee65820,
0x3f142426, 0x3e246830, 0x3f0b1ba5, 0x3e46c48c, 0x3f3dd169, 0x3ec4c396,
0x3f11ddf2, 0x3f4e810c, 0x3f47d649, 0x3d898da8, 0x3f352aaa, 0x3f774f54,
0x3cf165e0, 0x3ea3cf54, 0x3f04a59b, 0x3f1823fc, 0x3c866480, 0x3ed64912,
0x3f21c63d, 0x3cb0e280, 0x3de75608, 0x3f5b635f, 0x3eef44a4, 0x3f5bf695,
0x3e5004ec, 0x3e9e49ac, 0x3e6cb4f0, 0x3f13e360, 0x3ee84642, 0x3f170a7e,
0x3c98b440, 0x3ed8434e, 0x3f4b6a8d, 0x3f4e1c12, 0x3f017445, 0x3f602c52,
0x3f1d98ab, 0x3f1c39e0, 0x3f36b91e, 0x3f099460, 0x3f3f473c, 0x3eaa309c,
0x3ef40028, 0x3f614cb0, 0x3f51f29d, 0x3edc5382, 0x3e9817ba, 0x3eebfaee,
0x3dfe9798, 0x3f513324, 0x3f2f18bc, 0x3f1eaee4, 0x3f7ec9cf, 0x3e9a5f6e,
0x3f02c823, 0x3e926c4e, 0x3f52e716, 0x3f1f4e7f, 0x3f4699de, 0x3e5e306c,
0x3f37202a, 0x3f67b6c3, 0x3ec800ec, 0x3ebc5468, 0x3c4cb380, 0x3d8713f8,
0x3caa03c0, 0x3f2f2036, 0x3f3b96be, 0x3f1da0bb, 0x3f162c47, 0x3edf0b52,
0x3ef739c6, 0x3f20fc82, 0x3f23c33f, 0x3f07db38, 0x3f3c6951, 0x3f7d9df3,
0x3f6716d5, 0x3e0a1e28, 0x3f1643ac, 0x3d48e910, 0x3ef0e90c, 0x3f12d5c9,
0x3f6e4c6f, 0x3ed4912a, 0x3f722240, 0x3f5df927, 0x3f2566e8, 0x3f2ec523,
0x3e969378, 0x3f2db7d4, 0x3e9bedd0, 0x3d083d30, 0x3eeabfbc, 0x3ec5ff7e,
0x3ea059f6, 0x3eb4bbae, 0x3f616a75, 0x3f28ff1f, 0x3f413800, 0x3ee24bee,
0x3e72c494, 0x3f363b25, 0x3e9bda30, 0x3de09550, 0x3e1883f4, 0x3f5a9853,
0x3df29198, 0x3f3bc2f9, 0x3f66ab27, 0x3ece770e, 0x3f0fa0ea, 0x3d6d73a0,
0x3e83c22e, 0x3e1ad670, 0x3ed5d654, 0x3f3ab35e, 0x3f791af7, 0x3ef7a014,
0x3f53a835, 0x3f67b69e, 0x3f605bb0, 0x3f2e9544, 0x3f4bc5da, 0x3ddfb1a8,
0x3f5a50e0, 0x3f0d3923, 0x3eac19d4, 0x3f614842, 0x3ccd6160, 0x3e63bb00,
0x3eac9742, 0x3eeef5e6, 0x3f0a7bb0, 0x3ea15e76, 0x3f41d738, 0x3f5d7a96,
0x3f102db6, 0x3eeca428, 0x3f7019ee, 0x3f274fc1, 0x3f106608, 0x3f4187e6,
0x3e4cb700, 0x3eec45e2, 0x3ee9acc4, 0x3f393026, 0x3eb3c5b8, 0x3e55a6ac,
0x3ed7f8e8, 0x3ee304cc, 0x3eeaf74e, 0x3d190800, 0x3f518a7e, 0x3f342f7e,
0x3f51e039, 0x3eaa4dde, 0x3e944224, 0x3e5737d8, 0x3f50cb33, 0x3f4ddbc8,
0x3f4fbe94, 0x3f12d4b3, 0x3f5bbdde, 0x3f426e0c, 0x3f44b4f7, 0x3e54832c,
0x3ed3b7a6, 0x3ebbaea0, 0x3f43590d, 0x3f16857b, 0x3e2dd944, 0x3f3a245d,
0x3dcede78, 0x3f67a2d8, 0x3ec786ea, 0x3ed5cd6a, 0x3e0cc294, 0x3db674e0,
0x3ef709a2, 0x3ea8623e, 0x3ebbbcfa, 0x3ebd075c, 0x3f12e024, 0x3ed05d68,
0x3e821974, 0x3d28b000, 0x3f723c1b, 0x3f32a521, 0x3f6c2ffb, 0x3f145080,
0x3f1e5875, 0x3f7ee410, 0x3f4da8b3, 0x3f59e2d9, 0x3e767498, 0x3eec5f82,
0x3f74ecf3, 0x3f4b9316, 0x3f67ba98, 0x3f5ef6a7, 0x3f5726e4, 0x3e8e00a2,
0x3f031959, 0x3e1380ac, 0x3e722d8c, 0x3f483aea, 0x3f01ca43, 0x3e207030,
0x3ecf009e, 0x3f6513de, 0x3e9f542e, 0x3f23e5d6, 0x3ec9f980, 0x3ec0a858,
0x3edbf588, 0x3d846290, 0x3e3065c8, 0x3ddfe360, 0x3e5e9414, 0x3e6a7f7c,
0x3ef7a910, 0x3e809ff6, 0x3e0634e0, 0x3f49b10d, 0x3f4c9395, 0x3e257acc,
0x3f1da635, 0x3e76b5c8, 0x3ee3c3ae, 0x3e903e0e, 0x3f2ecd34, 0x3f23b2e3,
0x3efc3bae, 0x3f153f7e, 0x3ef5b9cc, 0x3f492e81, 0x3d7e3ba0, 0x3f27bf69,
0x3d352820, 0x3f22bf1e, 0x3e7e2248, 0x3e19b688, 0x3f59b74c, 0x3dcf0850,
0x3f751ea5, 0x3e9784f2, 0x3eb03d4a, 0x3f29ea13, 0x3e96fda6, 0x3e973df2,
0x3e55e8b8, 0x3ea99e82, 0x3f0ab311, 0x3ef8b48e, 0x3d5c8aa0, 0x3e5afaf0,
0x3f7d1f59, 0x3e6d6098, 0x3ecdbb70, 0x3f5caa96, 0x3f09f986, 0x3e91acd2,
0x3eef4158, 0x3f621726, 0x3de9ad60, 0x3f0dfa1f, 0x3f648800, 0x3f0b34ac,
0x3ef10064, 0x3f3e5453, 0x3f6945f9, 0x3f3f1ca2, 0x3f728858, 0x3e4e1a4c,
0x3f3ee4a5, 0x3f0b496a, 0x3eaa8d1c, 0x3d0ac270, 0x3e65b030, 0x3f7c843d,
0x3e8f817e, 0x3f06f535, 0x3f15fb72, 0x3f269adc, 0x3f212037, 0x3f2645b2,
0x3f0132d6, 0x3f07f69b, 0x3f3d3e7b, 0x3c08a240, 0x3f2fcfda, 0x3f319dba,
0x3ea14dac, 0x3ec52376, 0x3d295380, 0x3d2ec5e0, 0x3f42613f, 0x3f240e75,
0x3e1a82d4, 0x3e8c0a1c, 0x3eded670, 0x3b6f8700, 0x3e1a18ec, 0x3e2e1cf0,
0x3f50749b, 0x3f7344d2, 0x3decf740, 0x3e0ea45c, 0x3d6bd740, 0x3efa76f4,
0x3f5d8683, 0x3e9bfdda, 0x3f6f9a3d, 0x3e9544ce, 0x3e499b28, 0x3d896360,
0x3f39f9c7, 0x3e4494b8, 0x3e8ad98e, 0x3f210a7b, 0x3edff444, 0x3e6b664c,
0x3f7d3c8a, 0x3ee675e8, 0x3f577b63, 0x3f10e622, 0x3e6e8158, 0x3f36f6e6,
0x3f40c951, 0x3f276d71, 0x3f081dc8, 0x3eb4944c, 0x3f551eb2, 0x3de54648,
0x3f6c21e5, 0x3d94f1a8, 0x3e931b0c, 0x3f2f6149, 0x3cf02720, 0x3f702f0d,
0x3ec4e20c, 0x3f22fc7d, 0x3f338426, 0x3d186c70, 0x3f52b64e, 0x3f35da4e,
0x3f600a27, 0x3ef3cd62, 0x3dceb1b8, 0x3f73e064, 0x3ebb3e40, 0x3f080b45,
0x3f205635, 0x3e8903a2, 0x3ea2dd12, 0x3e6e3c5c, 0x3f606cdb, 0x3f03a4f3,
0x3effbf00, 0x3f101011, 0x3ee0c556, 0x3e9500c8, 0x3cc977e0, 0x3ea7abda,
0x3f5f2f83, 0x3f0cc395, 0x3f7b7b06, 0x3f046580, 0x3ee6f630, 0x3f1598f3,
0x3de3f398, 0x3c9258a0, 0x3d82fcf8, 0x3e9df156, 0x3e26fc7c, 0x3f34f4f1,
0x3ef6e51e, 0x3f3c3011, 0x3f2f7b3e, 0x3f2f77a8, 0x3c391300, 0x3e98f174,
0x3e8bdefc, 0x3f30c783, 0x3f7e96c6, 0x3ee68bcc, 0x3f34f143, 0x3e829ce8,
0x3f7e51ec, 0x3e599c8c, 0x3ec88726, 0x3f3e3fbc, 0x3f6fd35a, 0x3ec13da6,
0x3ee144a0, 0x3f286fb7, 0x3e6fd0b0, 0x3ed9cc4c, 0x3ea2d060, 0x3e955556,
0x3f2d06cf, 0x3e720e38, 0x3e2fd8ac, 0x3e9d9a9c, 0x3ea6b2ae, 0x3de723e0,
0x3f5a8c80, 0x3f5f3173, 0x3ecc026e, 0x3f114ce9, 0x3e8c8674, 0x3df6e308,
0x3f42c120, 0x3e8db458, 0x3f67cd9e, 0x3f30eaed, 0x3d64dc00, 0x3ea9c648,
0x3e9b1c00, 0x3f2dd324, 0x3e3b5bbc, 0x3f4504a5, 0x3f5c1b0a, 0x3f55056c,
0x3f031a8b, 0x3f295404, 0x3f79fe7e, 0x3f004533, 0x3f02251d, 0x3f552ee3,
0x3e92b6e8, 0x3ebc8276, 0x3f3b09a9, 0x3d832f88, 0x3d883ec8, 0x3f39313c,
0x3f79229e, 0x3f29359d, 0x3f2a06a1, 0x3f72c6d9, 0x3e472458, 0x3f3e730e,
0x3e7e72e0, 0x3f4a247a, 0x3f3d4c9c, 0x3f7548eb, 0x3f06acff, 0x3f38e4a1,
0x3f05bcf8, 0x3f4fec3e, 0x3f6ee7ac, 0x3f4ae502, 0x3f79fa3f, 0x3f74cb35,
0x3f77b600, 0x3f0ebe64, 0x3e1dab58, 0x3f3a0bb6, 0x3f7c8f45, 0x3ec8cb28,
0x3f1b8b9d, 0x3ed94b06, 0x3ecea096, 0x3f54fe0b, 0x3ea5ea86, 0x3f56c7ee,
0x3e8115f6, 0x3f5cb782, 0x3e03f00c, 0x3f070772, 0x3ecff4dc, 0x3ed39aa0,
0x3f1c8d73, 0x3f60bb81, 0x3e7fdbb8, 0x3f09dcec, 0x3e07d3f0, 0x3f645836,
0x3da27348, 0x3ea00164, 0x3f356582, 0x3efeae18, 0x3e8f9a26, 0x3f4f505e,
0x3f29a161, 0x3f12258e, 0x3f69c2b3, 0x3d874650, 0x3f1f6a46, 0x3f00ff2a,
0x3de7d058, 0x3ed437ee, 0x3f2c6f5b, 0x3ed248d6, 0x3e60a4cc, 0x3f3e748e,
0x3f600b31, 0x3f3d84e6, 0x3ec4d5cc, 0x3f4bafee, 0x3e386644, 0x3e318dc8,
0x3eb17c54, 0x3e31e274, 0x3e8983d4, 0x3ef7f19e, 0x3f79fd37, 0x3f46ab95,
0x3ef4fa0a, 0x3e45ff94, 0x3e29b3e8, 0x3bdda000, 0x3e4c0640, 0x3e5cbc1c,
0x3ecfe312, 0x3ece25ca, 0x3f1ed6e3, 0x3f2912cb, 0x3b579900, 0x3e69f2e4,
0x3d2799b0, 0x3f69ddbd, 0x3dc83ab8, 0x3f26740b, 0x3f6cc1fc, 0x3ea15e64,
0x3f6e6eae, 0x3f1a2698, 0x3f669642, 0x3f2ffa0f, 0x3f70c1e2, 0x3f5ee0b9,
0x3f48f8ab, 0x3f2a2697, 0x3ef3d882, 0x3f3147a7, 0x3f3024b6, 0x3e1b1a84,
0x3f5c060f, 0x3f30a1d8, 0x3f0d2dc0, 0x3f3a2052, 0x3edd3952, 0x3f5dbf09,
0x3f73ddbf, 0x3f796f28, 0x3e79afd8, 0x3f4303b6, 0x3e591f44, 0x3f435101,
0x3f7b47a3, 0x3e846f08, 0x3e94f9f0, 0x3f2ab23d, 0x3f113f6c, 0x3f706bc8,
0x3f74e40a, 0x3ef13f1c, 0x3f2a33b6, 0x3e6584e0, 0x3f032998, 0x3eb39b0a,
0x3f364f87, 0x3f339940, 0x3f18226c, 0x3e8a80e0, 0x3e821214, 0x3f06546e,
0x3f268e0e, 0x3f0b5f1b, 0x3f3fe2bf, 0x3f5b0d6c, 0x3f729e72, 0x3d2562c0,
0x3d0ca500, 0x3f68fc82, 0x3f5366e4, 0x3f3568ef, 0x3e87fad4, 0x3f75729b,
0x3f24e8a3, 0x3d6abaa0, 0x3e294464, 0x3ee48360, 0x3cd4a340, 0x3f079096,
0x3f04b48a, 0x3eccbce8, 0x3f74479d, 0x3f5798f2, 0x3f177ffb, 0x3f468703,
0x3ede83d8, 0x3e33576c, 0x3f079f61, 0x3ebbedf4, 0x3ef9986e, 0x3f1ee33b,
0x3f100c86, 0x3f1d754f, 0x3dacc6d0, 0x3ea3ee3e, 0x3f6148f7, 0x3e59ebe0,
0x3eb57436, 0x3f36b9d7, 0x3f48a837, 0x3eb09c80, 0x3f4af664, 0x3e62ef4c,
0x3f780af1, 0x3e594e4c, 0x3e5f4328, 0x3f667cf8, 0x3f01ad7c, 0x3ed7d95e,
0x3ee471de, 0x3f6f8cc3, 0x3f626796, 0x3e2bb88c, 0x3eb539bc, 0x3e9d1e70,
0x3d04af60, 0x3f0dc769, 0x3e8509ea, 0x3f2d3dca, 0x3f0eaf70, 0x3f14c9cb,
0x3f1d0796, 0x3d2e9980, 0x3e91e186, 0x3ef9dea2, 0x3eaf3da4, 0x3ee2b866,
0x3f74d4df, 0x3c97ad20, 0x3f3501e4, 0x3eb685bc, 0x3f683e80, 0x3f3064eb,
0x3e4850c8, 0x3f299c76, 0x3f7a2df3, 0x3d9d8918, 0x3f2e0403, 0x3ee87888,
0x3ee1c118, 0x3f20afd2, 0x3e0a36a0, 0x3f4f002e, 0x3f2004be, 0x3ec0ea64,
0x3ee5e8fe, 0x3e3b0098, 0x3f360ae8, 0x3f2f745a, 0x3de86310, 0x3f172288,
0x3d0bdb90, 0x3f2dc56c, 0x3dd51778, 0x3f4c2f67, 0x3f7d1f1c, 0x3f697864,
0x3f2b0d22, 0x3f53298b, 0x3f69653e, 0x3f65295a, 0x3e8a03fe, 0x3e0ec28c,
0x3e2d7d60, 0x3f5fc080, 0x3ef7bc34, 0x3ddf8a60, 0x3f59eeff, 0x3f353235,
0x3ebd4fe2, 0x3edc9062, 0x3f485eb6, 0x3f4a01a5, 0x3ef23f14, 0x3dd82e78,
0x3eab4254, 0x3eacd2da, 0x3e483ad0, 0x3f17ab53, 0x3f794651, 0x3f5e7411,
0x3f4a7dce, 0x3f2e1c71, 0x3f26887f, 0x3eb9448c, 0x3ec6bc90, 0x3f2d4776,
0x3d30df00, 0x3e621c80, 0x3f674051, 0x3da442d0, 0x3f01afbd, 0x3da56ea8,
0x3f1b26d0, 0x3d3b1de0, 0x3f463b7c, 0x3f5c5a36, 0x3e82972a, 0x3f6cb23b,
0x3f4f466e, 0x3ec2e70a, 0x3f5856e9, 0x3f0e5043, 0x3f2429af, 0x3f4d2142,
0x3f3882b1, 0x3f0f5c7d, 0x3f597aa7, 0x3f50529a, 0x3dab32d8, 0x3f6e5fdb,
0x3e81cd28, 0x3f26a4f2, 0x3ec57b76, 0x3ebc7f04, 0x3f62884e, 0x3f0bea47,
0x3f1a8299, 0x3f640dfd, 0x3db05d48, 0x3efc1a52, 0x3f753998, 0x3dc9acd0,
0x3f2c5310, 0x3edb896c, 0x3f0f220d, 0x3f624244, 0x3e82dea4, 0x3f1cc000,
0x3f6d70f2, 0x3e40a000, 0x3de915d0, 0x3ea79dd6, 0x3f6c3322, 0x3f3a5260,
0x3f2fafe6, 0x3f0220ba, 0x3c442f00, 0x3ead4e94, 0x3f5e0bef, 0x3f31c108,
0x3f0758ae, 0x3f23a01b, 0x3f357fc9, 0x3ec12ce6, 0x3f71ebfa, 0x3f54693f,
0x3e498b00, 0x3f76fe33, 0x3f5c7f35, 0x3e1a0d8c, 0x3be7bb80, 0x3ea85f74,
0x3f3501f1, 0x3f564dea, 0x3f1c1785, 0x3f54121f, 0x3dd2fe98, 0x3eaf108e,
0x3f137b55, 0x3d38b7a0, 0x3d8d7ad8, 0x3f0682c8, 0x3b82b780, 0x3ed94374,
0x3f6a6d68, 0x3f1947be, 0x3f09945e, 0x3f12a126, 0x3f211a5c, 0x3a417000,
0x3f73859c, 0x3f45166e, 0x3f5ebe59, 0x3f4bcb13, 0x3e9c5f96, 0x3f75736f,
0x3f51c1c5, 0x3ea9ee7a, 0x3f10e063, 0x3db5d480, 0x3f160ed2, 0x3f7cc91b,
0x3f10cd60, 0x3f3bf56b, 0x3dd68df8, 0x3d9faa98, 0x3f51ab40, 0x3f0c3d4d,
0x3e460d30, 0x3eed3cbc, 0x3ecc0972, 0x3d93c318, 0x3f2ee9a6, 0x3f085594,
0x3f78f9dc, 0x3e961498, 0x3e1e79c0, 0x3f25770f, 0x3f007cea, 0x3e5cfafc,
0x3dd02650, 0x3e91ff76, 0x3f07e6a2, 0x3eb84a80, 0x3f2cdfef, 0x3e22a594,
0x3eea89f0, 0x3f492e26, 0x3e85078e, 0x3f5bbaef, 0x3eb3905a, 0x3f54cc91,
0x3e93bf94, 0x3f6a45ae, 0x3cc7ade0, 0x3f549dbc, 0x3f0bb524, 0x3f418b70,
0x3f51f4fb, 0x3f70a11f, 0x3f21af9d, 0x3e3d5918, 0x3de43008, 0x3f028458,
0x3f6a153a, 0x3f14d2f6, 0x3ebb065e, 0x3f0f8d0e, 0x3e54b0a0, 0x3f26c7d4,
0x3e283e68, 0x3ef86164, 0x3d2a2ac0, 0x3e24efec, 0x3f5e896c, 0x3edb8888,
0x3f2bb5b7, 0x3f4e5a25, 0x3f7129ea, 0x3f30e0a6, 0x3e42d17c, 0x3f418e0c,
0x3f656084, 0x3f653727, 0x3eb9f338, 0x3f0c8312, 0x3f6ad76b, 0x3c6202c0,
0x3e21bfd0, 0x3eed70d8, 0x3d8ed008, 0x3f173534, 0x3f486671, 0x3d2b60a0,
0x3e89b5f6, 0x3eb618e6, 0x3f2ce2fa, 0x3f1381de, 0x3f1d9160, 0x3e9a7c70,
0x3db77628, 0x3e40f778, 0x3f69f880, 0x3f00cf19, 0x3f23b598, 0x3f08174d,
0x3ea918a8, 0x3f37c571, 0x3e820f62, 0x3d9b0ad8, 0x3f6d08c0, 0x3f7e543a,
0x3e8ce988, 0x3d861c00, 0x3d9b1840, 0x3eedd152, 0x3e485274, 0x3e6e882c,
0x3f595212, 0x3f2d1eb1, 0x3ee28b92, 0x3f393f6c, 0x3e5e0ef8, 0x3e2c3a80,
0x3f0e7d9c, 0x3d9d3400, 0x3f7c9d2b, 0x3e66c4e8, 0x3f1009f7, 0x3e7bf370,
0x3e87f8d0, 0x3f788965, 0x3f7e73a4, 0x3f7c8526, 0x3dfb2360, 0x3f02793c,
0x3e070eec, 0x3cdc3e20, 0x3f32d2dc, 0x3f098e7b, 0x3e4a9388, 0x3f00bbcb,
0x3f17cf5c, 0x3e4ecfdc, 0x3ec449ba, 0x3eb8c960, 0x3ef8abde, 0x3e24469c,
0x3f170541, 0x3edbb978, 0x3f4d6120, 0x3ef277ae, 0x3f4e43f2, 0x3ec9478a,
0x3e6e5808, 0x3ebde8d6, 0x3f082d70, 0x3e87f052, 0x3e29bfb0, 0x3eda91e2,
0x3f202a5d, 0x3ed63c34, 0x3f0fa677, 0x3f0775e8, 0x3ea35558, 0x3dc8b398,
0x3e2002bc, 0x3e697b90, 0x3f19a280, 0x3ec93b6c, 0x3e44b508, 0x3ea13342,
0x3c90ad40, 0x3ed610d2, 0x3e87b876, 0x3eda1da8, 0x3f64ea12, 0x3c29b900,
0x3e4cb500, 0x3e294f00, 0x3e936792, 0x3f73d94b, 0x3da75668, 0x3cd397a0,
0x3d369470, 0x3c97c5e0, 0x3e6d00a8, 0x3f4f6edb, 0x3efa83ea, 0x3f5bad61,
0x3f514571, 0x3ca64680, 0x3eae2800, 0x3ef8ceaa, 0x3f6370c8, 0x3f440ca1,
0x3f7a74c5, 0x3f65d024, 0x3f1767f7, 0x3dd46c08, 0x3f6f9204, 0x3f5e1606,
0x3f6ab19b, 0x3f1f20d7, 0x3f06952e, 0x3d5fb890, 0x3ed6ba26, 0x3f0ad89e,
0x3e740f08, 0x3f7428e9, 0x3f1b0d20, 0x3daba2b0, 0x3e172e24, 0x3a779800,
0x3efbf780, 0x3f0fc439, 0x3f6f80fd, 0x3ed131e4, 0x3ec6e8ba, 0x3f5cb35b,
0x3e615610, 0x3eab48c6, 0x3f0ac3db, 0x3c813180, 0x3f6a6240, 0x3f58f98e,
0x3ea44696, 0x3edb1334, 0x3f47e4a5, 0x3f4de0e1, 0x3edbf24c, 0x3d544610,
0x3f2426aa, 0x3cd486e0, 0x3ed9ccd4, 0x3d640b00, 0x3f620ff1, 0x3e9f4fb4,
0x3f6bc59f, 0x3f74ab39, 0x3e0ab1f0, 0x3f11b2b5, 0x3f65e216, 0x3eb0b7c6,
0x3f2c35dc, 0x3f5321d2, 0x3f3d4365, 0x3f6b1eee, 0x3f38c399, 0x3ec4deae,
0x3f0f7454, 0x3e91c52a, 0x3f7bf797, 0x3f5aa345, 0x3e62886c, 0x3f3a35d4,
0x3ecbf950, 0x3f214f6e, 0x3f4c49e8, 0x3f322e38, 0x3e3caa44, 0x3ea79586,
0x3f360305, 0x3f5223fc, 0x3f085114, 0x3f10d3bb, 0x3e8da57a, 0x3ed097ac,
0x3ede38c8, 0x3e898dfa, 0x3ed10910, 0x3f33ba5c, 0x3d8475d0, 0x3f4bf145,
0x3daf4e18, 0x3e4975f8, 0x3d73ea90, 0x3f6b831d, 0x3d4b8f60, 0x3f39970d,
0x3f0a2217, 0x3e2c9714, 0x3f62858e, 0x3d02c820, 0x3f2f9d59, 0x3eae793e,
0x3f6cc09d, 0x3c3a98c0, 0x3f5c15c0, 0x3f17a57d, 0x3e1401d8, 0x3f2372f3,
0x3f599774, 0x3f7d5836, 0x3f1a7a50, 0x3e487078, 0x3eda58ec, 0x3f5ed2cf,
0x3eea7eba, 0x3f023c54, 0x3eea7fea, 0x3f6f4922, 0x3f1f446b, 0x3e9ed28a,
0x3e38c488, 0x3edde92a, 0x3f3a1bd0, 0x3f41363a, 0x3f469bcb, 0x3eed8668,
0x3e9badae, 0x3f0b67fc, 0x3e42a6cc, 0x3e2b63c4, 0x3f68f9c6, 0x3ba62880,
0x3ef6f17a, 0x3e035950, 0x3f513d0f, 0x3e91a298, 0x3d7e5440, 0x3f439366,
0x3e98604c, 0x3f6137ec, 0x3dc44c58, 0x3f199655, 0x3e90b53c, 0x3f3cf9df,
0x3e8cdb66, 0x3c7f9cc0, 0x3f7e21b0, 0x3ec2df98, 0x3e3c6a20, 0x3f682873,
0x3f492deb, 0x3f63bda8, 0x3e7dae2c, 0x3d8e8280, 0x3f18eb3c, 0x3f798158,
0x3d366370, 0x3e3c25dc, 0x3e79ed0c, 0x3e8a14ae, 0x3d23e8f0, 0x3f442f56,
0x3ee36cc0, 0x3f375a0e, 0x3f0cfd62, 0x3f41deca, 0x3f371e13, 0x3f7df3b9,
0x3f641d47, 0x3eff708a, 0x3eb5ac70, 0x3eeb1ea0, 0x3f2dd090, 0x3f5b1711,
0x3f2c9c50, 0x3c9b5d00, 0x3f21ce76, 0x3f0b3455, 0x3e721780, 0x3debd3d8,
0x3ee69ad0, 0x3f6a3e92, 0x3e44370c, 0x3cd70b20, 0x3f67e35c, 0x3ebec97c,
0x3f5f8319, 0x3f4b96a2, 0x3eaf1298, 0x3e4e204c, 0x3eaf0214, 0x3d2d3440,
0x3e56d6b8, 0x3f217618, 0x3f047754, 0x3f3ce9e2, 0x3f791eba, 0x3f3da46b,
0x3f0cf931, 0x3f3b2bca, 0x3eaebb44, 0x3f2c2f97, 0x3f7a789d, 0x3f498bd1,
0x3f7e30b0, 0x3d0fb0e0, 0x3ebad65a, 0x3e0cd240, 0x3e18fb34, 0x3e99995a,
0x3f031f8e, 0x3f2074ca, 0x3efe0d1e, 0x3e7ba5b0, 0x3f789ea7, 0x3e81216c,
0x3d150440, 0x3f31c10f, 0x3f1c1ff7, 0x3e7aaa80, 0x3f5d13e3, 0x3f7562bf,
0x3e002ef8, 0x3f548e97, 0x3f43d011, 0x3f6cbe41, 0x3e1dd5c4, 0x3f0455e9,
0x3d7a8840, 0x3bffb480, 0x3f4a53cc, 0x3d6d95a0, 0x3dbf4398, 0x3e1f5d18,
0x3e73f26c, 0x3efc6682, 0x3f49593a, 0x3f20560d, 0x3e6684ac, 0x3f1c2fec,
0x3f2928fd, 0x3f62c2be, 0x3e86282e, 0x3f10711e, 0x3f72c1ba, 0x3cb91760,
0x3e891758, 0x3e05b87c, 0x3e32b134, 0x3f5300a3, 0x3f72a7eb, 0x3ce5daa0,
0x3f05dc44, 0x3e389e00, 0x3ea3a2c6, 0x3f323be9, 0x3f41aa86, 0x3e56f618,
0x3c152180, 0x3f74b62c, 0x3d6b5cf0, 0x3d80c960, 0x3f704c6a, 0x3d903960,
0x3ece4754, 0x3f1b0eb6, 0x3dd1c908, 0x3d4aa1f0, 0x3e6c8818, 0x3e7bac78,
0x3f31e5fb, 0x3ecae886, 0x3f3fb539, 0x3f40c8a9, 0x3f21b7b3, 0x3f3f1634,
0x3f35d098, 0x3e653504, 0x3e4b3124, 0x3c0bee00, 0x3e768770, 0x3f4c8996,
0x3f6bad38, 0x3f14dcf6, 0x3ef6198e, 0x3e21a758, 0x3e981052, 0x3ea62be0,
0x3f3b0534, 0x3f11f168, 0x3f487ab5, 0x3e47f804, 0x3f26b6cd, 0x3e791c58,
0x3e00e8bc, 0x3f6899fd, 0x3f6541f4, 0x3d5b9fe0, 0x3e389b90, 0x3f2b13aa,
0x3f4d9730, 0x3eca8a50, 0x3f278121, 0x3f39e2ff, 0x3f3e4aa6, 0x3f400835,
0x3e130ddc, 0x3f35d27b, 0x3ecc9640, 0x3f69018f, 0x3f0082f1, 0x3f452131,
0x3ed5793e, 0x3f419e18, 0x3eb9201e, 0x3e2f867c, 0x3dc4b4a0, 0x3d9042d0,
0x3f420e18, 0x3f2a985f, 0x3f5b1303, 0x3ee870fc, 0x3eaaa0e2, 0x3f465da8,
0x3f67e8d4, 0x3e31306c, 0x3e974a36, 0x3e9aa284, 0x3eae3e36, 0x3e56dfdc,
0x3f1246ef, 0x3f2efa26, 0x3f47c8a5, 0x3ef8259a, 0x3f1383d5, 0x3f68cb4d,
0x3f33b5ed, 0x3ed86a62, 0x3f41b36e, 0x3f115e00, 0x3f3af463, 0x3ee41e10,
0x3f473c66, 0x3e261b4c, 0x3e8d4fb2, 0x3f159f00, 0x3e508c38, 0x3f32392f,
0x3f7e21b6, 0x3ea3b734, 0x3f005931, 0x3d6700a0, 0x3e51df88, 0x3ef213fa,
0x3f5f7884, 0x3ebac918, 0x3f2e4c2f, 0x3f25a310, 0x3e95f6e4, 0x3f384f8c,
0x3ea1734c, 0x3ed012aa, 0x3ddd03c8, 0x3ede75f0, 0x3f441bf2, 0x3e8a57b0,
0x3f281be8, 0x3eec1672, 0x3f432bee, 0x3f2b360b, 0x3f4c1fe8, 0x3e8a23de,
0x3d45dba0, 0x3f65ef97, 0x3f3f60db, 0x3dca8de0, 0x3c366400, 0x3f5fe7ff,
};
// 15,10,6,7
uint32_t kernel_vals[] = {
0x3cdb73c2, 0x3c526030, 0xbc26191a, 0xbcb1ae92, 0xbc7bf83c, 0x3c2f1530,
0xbb4ac0f0, 0x3c97fb2a, 0x3c31873c, 0x3c980e58, 0x3cd67cbe, 0x3c31b9c0,
0xbc834918, 0xbd06e50d, 0x3cb9e752, 0xbc363616, 0x3cb73d78, 0x3c47f97c,
0xbc82b898, 0x3c2760f0, 0x3cea7da8, 0x3c3b0884, 0x3cc7c39c, 0x3ce3a586,
0xbb3c9b58, 0xbcab4bb2, 0xbc0cf1da, 0xbd009685, 0x3b513b20, 0x3d007089,
0x3b707a20, 0x3d01ff61, 0xbaf24ae0, 0x3c9255d2, 0xbba7a480, 0x3bef72b8,
0x3bb80a70, 0x3a3aeac0, 0x3cae4cfe, 0x3b9f1930, 0xbbe18824, 0xbcb4bf92,
0xbd03344e, 0xbc1e3f8a, 0xbce4b6a9, 0x3be7c068, 0xbbe5ddf8, 0x3bd8db80,
0x3cf41fde, 0x3c584bac, 0x3ce409a0, 0xbcbbbaa3, 0xbc93c292, 0xbc35fb5e,
0x3ad80ba0, 0x3c9e5f78, 0x3c9dd550, 0x3cad6cec, 0xbb59bac0, 0x3c4fa608,
0xbcd34638, 0xbc321fce, 0x3ca213be, 0x3cf9969a, 0x3cfd9836, 0xbbef3644,
0xbd06dde2, 0x3c96f69c, 0x3cd102ec, 0xbc7d8c30, 0xbd0836d6, 0x3c0fc210,
0xba84f760, 0xbbdf353c, 0xbb44f878, 0xbb84fa68, 0x3cd2f472, 0xbc5b2b5e,
0x3ce85990, 0x3cf00b22, 0xbc094c40, 0x3b4ef580, 0x3ce62008, 0xbb48f8f0,
0xbc6ffb34, 0x3cdec706, 0xbb674aa8, 0xbc8525c7, 0xbb7547c0, 0xbcff6214,
0xbade41a0, 0x3c3b0a78, 0xbcdd38fe, 0x3c805196, 0x3cb22bbe, 0x3d086e91,
0x3b0b5450, 0x3b3921e0, 0xbcde18f8, 0x3ce8a88e, 0xbc889949, 0xbc40e830,
0x3cf78466, 0xb9423100, 0xbd0224d4, 0x3d0861bf, 0x3cc13370, 0x3d018ce1,
0xbce7a2ee, 0x3c9463c2, 0x3bcf3c78, 0xbb9c0de0, 0x3ca52d6e, 0xbb45ea00,
0xbccccf4c, 0x3c9e357a, 0xbce0f487, 0xbcf80901, 0xbc67b1da, 0x3cd12a12,
0xbd033d54, 0x3c9cba84, 0xbcfb5fc1, 0x3be3fd08, 0xbccba260, 0x3b1da040,
0xbcc7f0c5, 0x3c928254, 0x3cab85ee, 0x3c954e84, 0x3cc7f242, 0xbcb3b7b6,
0x3c25e534, 0x3ccf0e5a, 0xbc91e9c9, 0xbc73e8ca, 0x3c9e0676, 0x3cc00de2,
0x3c098bb0, 0x3c8a0e6e, 0xbb8acaac, 0x3cdfa214, 0xbc354dd6, 0xbc6ba928,
0x3cdf1b8a, 0x3caf2c38, 0x3c249960, 0x3ce113fa, 0x3c612318, 0xbb61fb20,
0xbccfe7f0, 0xbbd52a78, 0xbbb25e10, 0xbc956ee5, 0xbca6dd38, 0xbca1c894,
0xbcf8c96e, 0x3cfaba02, 0xbcda72e9, 0x3b746490, 0xbc54cdce, 0x3d027979,
0xbccb1618, 0xbce43af6, 0xbc5df28a, 0xbb6369f0, 0xbba6cd08, 0x3ca5150c,
0xbcc54076, 0x3cae99b6, 0x3cf34a76, 0x3cc7246e, 0xbaecf3a0, 0x3cf48276,
0x3c70c220, 0x3b89cd00, 0xba265080, 0x3cc80524, 0xbcd12c7a, 0x3c3b7b84,
0x3cab054c, 0xbce9b3d0, 0xbcdf41a3, 0xbbedc0b4, 0xbcdc5af2, 0x3cd5c4a0,
0xbbce6bb4, 0x3cc4958e, 0x3be1da90, 0x3bfacb08, 0xbc99371a, 0xbca37ee5,
0xbb0455f0, 0xbc363b68, 0xbc92b249, 0xbc551568, 0xbc37cc92, 0xbc72ba9a,
0xbcd2b25a, 0x3bd20c48, 0x3c80ff92, 0x3bdfffd0, 0x38c8a600, 0x3c070c94,
0xbcdd1e81, 0xbaf0de40, 0x3cec0380, 0x3cd32e18, 0x3cd5deda, 0xbbdc0634,
0xbc9f18a5, 0x3cd5b556, 0xba5fe580, 0xbbc1fa9c, 0xbc2c6ac4, 0xbb775968,
0xbc9bee18, 0x3cf68936, 0xbc29e44a, 0xbc05fd9a, 0x3ca907c0, 0x3c823938,
0xbbbbe134, 0x379fd800, 0x3ce5dd14, 0x3c3eff30, 0x3c86194c, 0x3cc253e2,
0xbd067b50, 0xbc9feee5, 0xbb145248, 0x3d06162f, 0xbc848272, 0xbbf61da4,
0x3cd025f6, 0xbca65985, 0x3c935de4, 0xbc761240, 0xbce2224c, 0xbcb8c0e7,
0xbcca2f36, 0xbcc5dfce, 0x3c06b534, 0x3c631320, 0x3c9920ce, 0xbc37217c,
0xbceb8db0, 0xbc738646, 0xbcde5e2e, 0xbcd6a0b4, 0x3cc074b6, 0xbc8e59cd,
0x3c31f2b0, 0x3b113410, 0xbca06300, 0x3ca48f16, 0x3ca885fe, 0xbc827b12,
0xbc448e00, 0xbba60d44, 0xbcbce2c0, 0x3c71c9e4, 0x3d039d41, 0x3cb6e8d4,
0x3c680c30, 0xbcdb4bf6, 0x3b6a1710, 0xbbab9d34, 0x3cff1eae, 0xbc8b0ba7,
0x3ca95a46, 0x3bea8810, 0x3cda3dd8, 0xbbd334a4, 0xbbc88d80, 0xbcfe6a7d,
0x3c0d0b30, 0x3ca7866a, 0xbc298abc, 0x3cf81c36, 0x3c20ea54, 0x3ba54a70,
0xbce0ad30, 0xbacf5e80, 0x3b4bff10, 0xbc6f5fe2, 0x3cfbeeb6, 0xbc14f49e,
0x3b2fc1f0, 0xbac974a0, 0xbc43e8a8, 0xbb0e4de0, 0xbd04e6ea, 0xbd042bef,
0x3cd31c08, 0xbaa9caa0, 0x3c1df88c, 0x3ce01630, 0x3bd3ca30, 0xbcc6d7b2,
0xb979e500, 0x3c91ef76, 0xbc676540, 0x3c100668, 0xbbffb008, 0xbba2f1e0,
0xbd041ee4, 0x3c8123ee, 0xbcbbd0b6, 0x3c26b13c, 0xbb88d7cc, 0x3c4248bc,
0x3c332370, 0x3c8c954e, 0xbcc31360, 0xba90d1c0, 0x3c984162, 0x3ca679be,
0xbbb18d88, 0x3d062691, 0x3b859268, 0xbb056a10, 0xbc6d34bc, 0x3d017cd3,
0x3c800880, 0xbc9f02c7, 0x3ce1f8ce, 0xbca4cb8b, 0x3c7fb720, 0x3cce7622,
0x3c96871a, 0x3b472a20, 0x3c68f488, 0x3bc68a60, 0xbcd24547, 0xbab6da00,
0x3b0f53d0, 0xbbd7a9bc, 0x3ccd74f4, 0x3cbe3b2c, 0x3c600240, 0xb8fcb800,
0x3ce7968e, 0x3cd7a0f4, 0xbcf17852, 0xbc392c00, 0x3cebb946, 0x3bc06630,
0xbcc378fe, 0xbcacecc7, 0x3cee1276, 0x3bbfc718, 0xbc9f908e, 0xbc686c00,
0x3caee9a0, 0x3c18c964, 0x3cff88ee, 0xbcbb91a7, 0xbb34c848, 0x3c819278,
0x3ca7ced8, 0xbc109352, 0x3cf20012, 0x3cdb2506, 0x3b1f4d50, 0xbc36af4a,
0xbcd04c8c, 0xbcde58ee, 0xbd087c84, 0xbc0a0b8e, 0xbc758822, 0xbbd7ae58,
0xbcebff76, 0x3b862a48, 0xbbfc8180, 0xbc707e16, 0x3c86bc88, 0x3ce3ea2a,
0x3b38ab00, 0x3cfe37a6, 0xbca4a3d2, 0x3ca1c802, 0xbc810747, 0xbcceb178,
0xbb2c6dc0, 0x3ad1d640, 0x3cf32c3e, 0x3b69acc0, 0x3d018aff, 0xbc29adb4,
0xbc2bacc0, 0xbc985f25, 0x3cb9b5ce, 0xbce45252, 0x3d079ecd, 0xbd00b00b,
0x3bd9a998, 0xbcfacd52, 0x3cd7b432, 0x3c632aa4, 0xbb7c1c20, 0x3bc66918,
0x3c72de5c, 0x3c7f6f48, 0x3c23d93c, 0xbcbf6e94, 0xbca65718, 0xbc62ad46,
0x3c82db7a, 0x3cc31a50, 0x3cc49d1c, 0x3c94d1a2, 0x3b15b110, 0x3c844dd6,
0xbba9761c, 0x3ca76c5e, 0x3d00a875, 0xbb1ea078, 0xbc56518a, 0xbcfd8b9f,
0xbb665798, 0x3cbbba3c, 0x3bf89fe8, 0xbcfed0d0, 0xbc633e6c, 0xbc3980a2,
0x3d06fdfd, 0x3cceb832, 0x3c994c32, 0x3c829982, 0x3c092dbc, 0xbc2b6c44,
0x3cc4d2ce, 0xbcb16cc0, 0xbbeec568, 0x3cd72d26, 0xbc503d92, 0xbabf45a0,
0x3c66d204, 0x3cf23036, 0x3c10ae80, 0x3c5560c0, 0x3c0b9c24, 0xbcef18d6,
0xbcbbede3, 0x3ca1fd72, 0xbb8bb6c4, 0x3c840274, 0x3c8cd982, 0x3bf27fc8,
0x3cb5b230, 0xbc4e3f0a, 0x3c859f14, 0x3ccb5b14, 0x3cb55d6c, 0xbcc16dbc,
0x3c0ab0f0, 0x3cbc3fd8, 0xbc261bde, 0xbd03a753, 0x3cc23830, 0xbbdbae4c,
0xbd03dd28, 0xbcd040c3, 0x3c985ef0, 0xbcdff2a1, 0x3c85f30e, 0x3ccf4274,
0x3cf8077e, 0x3cada580, 0xbce5017a, 0x3cae75ae, 0x3a3cda80, 0x3aaaa060,
0xbbfe0290, 0xbcacd3a9, 0x3c89fadc, 0xbc980c65, 0xbbd7c1bc, 0xbce83f0e,
0xbb0a9000, 0x3c1e56dc, 0x3ce86290, 0x3c5d3720, 0x3c4cabec, 0x3c2ffcc4,
0x3bf320e0, 0x3c3ca4e4, 0x3ce2190a, 0xbcf6bf45, 0xbcc675a9, 0xbcff935a,
0xb90d3700, 0x3cdb6000, 0xbcd76f67, 0xbceba5da, 0x3cd9d4ca, 0xbbc4663c,
0x3c1b1280, 0x3ae7d280, 0x3cc33eec, 0x37476000, 0x3c1e3270, 0x3cd543f8,
0x3d0788f1, 0x3c18f048, 0xbb9aa8e8, 0x3cdab5c2, 0x3c9daad8, 0x3d052e2f,
0x3c8ada02, 0x3c4b6438, 0xbcb6def6, 0x3bf10dd8, 0x3ce2a0f4, 0xbcb73638,
0xbc9d7076, 0xbb67a998, 0xb9fda680, 0xbb4a26e0, 0xbcb7e2fa, 0xbc80fffc,
0x3d06ab9f, 0xbcbd2ea3, 0x3bcc1dd8, 0x3cc49b38, 0xbbcf2934, 0x3c9b12de,
0x3b131c10, 0xbcef8ed8, 0x3ce385b8, 0xbc33e434, 0xbce2f021, 0xbc39a216,
0x3c016178, 0xbc9ebf52, 0x3bea4878, 0xbb81a378, 0x3cc40aaa, 0x3c7a4b30,
0x3c6a541c, 0xbc4d1b96, 0x3c935182, 0x3ab8a8c0, 0x3cc4d618, 0x3cf00b96,
0x3c58767c, 0xbc15fc30, 0xbcdf6170, 0x3c4d03dc, 0x3c2cbd74, 0x3ce0de8e,
0xbcff0603, 0xbc4babb4, 0x3cf1d252, 0x3c8186f8, 0x3cbcc842, 0x3cf2abea,
0x3cdbdcf6, 0x3c081b9c, 0xbb1efe88, 0xbca0ef98, 0x3cddd9fe, 0xbb239d20,
0xbbc9f978, 0x3ca4763a, 0xbc6dddd2, 0x3cec2e36, 0xbc9d0eba, 0x3cea7d12,
0x3c5e268c, 0x3cb285ca, 0xbbca0ebc, 0xbc5ef63c, 0xbd00b5aa, 0xbcbdfcf6,
0x3caa6e92, 0x3c62437c, 0x3c240b8c, 0x3c5fd878, 0x3c383ff8, 0x3cc0b35e,
0xbbfa7960, 0xbc87ecd8, 0xbcf85c98, 0xbbbd76a4, 0xbc7274d2, 0xbcb3d669,
0xbce9ca70, 0x3ce726ae, 0xbc8eb800, 0xbcd6179a, 0x3c201370, 0xbc0ac07c,
0xbc29858e, 0xbccc8e76, 0x3ce08baa, 0x3c2681b0, 0x3d01ec3b, 0xbcb64a34,
0xbbc730f8, 0x3cbda8b2, 0x3c675478, 0xbc39a268, 0x3bb67608, 0xbcaea76c,
0x3c7ec200, 0xbb0391f0, 0xbcb73d32, 0xbd01021b, 0x3cf52186, 0xbc66134a,
0x3b621ff0, 0xbbd6c370, 0x3bbd1c30, 0xbcfe6af4, 0x3cedab66, 0xbcc48a90,
0xbcf79fc5, 0xbb130f30, 0xbce03b0c, 0x3c8403b6, 0xbb8690f0, 0xbaf30d80,
0xbc1b3292, 0x3c8d792a, 0x3d037777, 0xbd055679, 0x3b9a3a08, 0xbcedfd94,
0x3bfdc8d8, 0xbc9d6c6b, 0xbd05151d, 0x3cea4bce, 0x3980a900, 0xbcf3e49a,
0xbce8b7b6, 0x3c584198, 0x3bf91a08, 0x3cd96e1a, 0xbcaf1b90, 0xbcaf24fc,
0x3c3762ac, 0x3d06627b, 0xbb3aea88, 0x3cd10bfa, 0xbd07ca4c, 0x3c9ecb7a,
0xbd04a7ca, 0xbce6d4e1, 0x3cff3e62, 0xbc1ac1e2, 0xbc9354e7, 0x3c5bc734,
0xbc22977c, 0xbd00844e, 0xbce5b0e7, 0xbccb86ba, 0x3c8098de, 0xbc24e4d6,
0x3cebde02, 0xbb733238, 0x3c779648, 0x3cf6bfba, 0x3cbad862, 0x3b7b7600,
0xbca75200, 0x3c45bc48, 0xbcab4989, 0x3c700fa4, 0xbcc7729e, 0x3d077b59,
0x3cbbcda4, 0xbcd4a4c3, 0xba5fdbc0, 0x3b123de0, 0xbcddc521, 0x3c8ae7e0,
0xbd0109a5, 0x3ca10d04, 0xbc64955e, 0xbcbb8752, 0x3c3a4384, 0xbb97972c,
0x3c510624, 0xbc1a2612, 0xbc5e2b52, 0x3cc81ec2, 0x3d00660b, 0xbc219604,
0xbb9cf3bc, 0x3bbd5930, 0xbbc07908, 0xbcda499a, 0x3cf15722, 0x3c7f94dc,
0xbc785ada, 0xbc66224e, 0x3cfbf0da, 0xbcb7833c, 0xbcbf6432, 0x3cd409f6,
0x3d06735d, 0x3c9db84e, 0xbc381bca, 0x3ca26d34, 0x3c371554, 0xbc192fa6,
0xbcfc9a3d, 0x3ce755b6, 0x3ca6a260, 0xbc5f1ad2, 0xbcea7852, 0xbc82f598,
0x3ca71ffc, 0x3bf22490, 0x3cc5f3da, 0xbcf65cda, 0x3ba4c700, 0xbc26825e,
0xbab1c220, 0xbcf4e716, 0x3c67d8d0, 0xbc6b8b38, 0x3ca6af32, 0x3cb24316,
0x3cff1a36, 0x3bf3a160, 0x3c221cb4, 0x3ce98b3c, 0x3ca64ea0, 0xbcd5567e,
0x3b675320, 0xbce106ff, 0x3be95578, 0x3ccf83fc, 0xbcf785a5, 0xbb8a2578,
0x3cd41e8c, 0xbbd38060, 0xbcec5e89, 0xbbbe17d4, 0xbc06ae16, 0x3c6859a4,
0x3d024923, 0xbcf7403d, 0xbc723b9a, 0xbc6225a2, 0xbcec07df, 0xbc7dafe8,
0xb9adad00, 0xbce63365, 0xbcac3d65, 0x3d015505, 0xbc38a204, 0x3c15a10c,
0x3cf4a96e, 0xbcbaf058, 0xbb28b930, 0xbc5b50f8, 0x3c001f00, 0xbcc1f280,
0x3d03cd45, 0xba18dc40, 0x3c26efa8, 0xb8944600, 0xbcb1c058, 0xbc871178,
0x3bd5bf48, 0xbc686996, 0xbc35c234, 0xbced4fb4, 0x3c47efc0, 0x3bb829e8,
0x3cdf2256, 0xbca60f07, 0xbbe60188, 0xba88d7a0, 0xbcd30e89, 0x3b44a3d0,
0x3c2d328c, 0x3cb49e70, 0x3cb94a8c, 0xbc34637c, 0x3cd14968, 0x3c17e430,
0xbc55323c, 0x3ce435b4, 0xbb14a148, 0x3cd41972, 0x3d05b025, 0x3d0229b3,
0x3c288b84, 0xbcd0770c, 0xbc49c738, 0x3d015ea5, 0x3cfd81da, 0xbd07777b,
0x3cef3a1e, 0x3cee03e8, 0x3cd07716, 0xbce633ff, 0x3c993754, 0x3c0829dc,
0x3cc072ca, 0x37071000, 0xbc49d01e, 0x3ca89022, 0x3bb9ce68, 0xbb138a00,
0xbb5d23c0, 0xbd027647, 0x3c9f8134, 0x3ce12816, 0x3d0704cd, 0x3c067b2c,
0x3c938550, 0xbc9bc0a0, 0x3ca180ae, 0xbcf2cbc7, 0xbc22144a, 0x3cac96a4,
0x3cafc46c, 0xbd04b8a2, 0xbca675b8, 0xbcfbf845, 0x3c276504, 0x3ce398d0,
0x3ca7a224, 0xbbdff44c, 0x3c4d8db0, 0x3b8c1510, 0xbb1fe988, 0xbc67048a,
0x3b700fd0, 0xbca368ab, 0x3c5e92ec, 0x3cebcf46, 0x3cc91a6e, 0x3c7a446c,
0x3cde3ca6, 0x3cfe02e6, 0xbcdab10e, 0xbc3f8000, 0x3b91f660, 0xbbc1d508,
0x3bdcfc30, 0xbbe94e80, 0x3c0086f8, 0xbd0611d8, 0xbc9450a9, 0x3c98d27e,
0x3ca10f0e, 0xbc0b78bc, 0x3cf39c8a, 0xbcaf7d56, 0x3be738d0, 0xbc11fcda,
0x3d04a341, 0xbc070e5e, 0x3b02dbe0, 0x3c67cd80, 0x3ca63e0a, 0x3b355fc0,
0x3cd4b738, 0x3c0d9bbc, 0x3d06a273, 0x3bb0efd8, 0x3cf7bfae, 0x3c9f7218,
0xbc48e062, 0x3c76a55c, 0x3c907bce, 0x3cc87c44, 0x3d029e47, 0xbc3f6b16,
0x3cddab66, 0x3c710c24, 0xbbc3fde0, 0x3cdd1a02, 0x3cc357b6, 0xb977d900,
0xbbe3b89c, 0xbcbec37e, 0x3d028bbf, 0xbb4e3c20, 0x3d076067, 0x3cffd22e,
0xbcb0b545, 0x3ca03178, 0x3cd86462, 0x3c729b88, 0xbcc4cd5e, 0xbceee7d0,
0xbb7d0d48, 0x3cd2f1dc, 0xbc86cd2b, 0xbb1e1868, 0xbc840023, 0xbcce608c,
0x3b8a11d8, 0x3a095040, 0xbb8e70f8, 0xbca76038, 0x3c69b384, 0xbc2140b0,
0x3c5f0e00, 0xbcd8b59e, 0xbc243cda, 0x3c6cac28, 0xbba19788, 0xb9ce1380,
0x3ba9e828, 0xbca20ed2, 0xbd03ba43, 0xbcd568d4, 0x3ca1d658, 0x3cc785cc,
0x3d02cf0d, 0x3cd45810, 0xbcb7acde, 0x3cd56f94, 0x3ccf021c, 0xbbcf0df8,
0x3c13d4c8, 0xbbb87490, 0xbc182be6, 0xbb3aa1f0, 0xbb78c010, 0x3bf0c898,
0xbcc94d5a, 0x3cea3908, 0x3cdc9df4, 0x3d0298ff, 0x3d062153, 0x3cfdc31e,
0x3cc8462e, 0x3c4160a8, 0x3c154fc8, 0x3cd95e3c, 0xbc54367c, 0x3c730b1c,
0xbc334584, 0xbb377010, 0xbc869a6d, 0xba21e1c0, 0x3b61d0d0, 0x3c523b9c,
0xbcd9e6da, 0x3cc05fe8, 0xbcaf60e9, 0x3bbc00e8, 0xbb52ee48, 0x3c074ce4,
0x3ce45c50, 0xbc8e6eab, 0x3aa5b0a0, 0xbbdeda60, 0xbb09b6f0, 0x3ce01e20,
0x3c18bea4, 0xbb3013f0, 0x3cbcca08, 0x399cfd00, 0xbd00b20b, 0xbc87ddfa,
0x3c3e5648, 0xbb8b2578, 0xbcb8e974, 0xbb98cab4, 0x3ca893ca, 0x3c60ff50,
0xbc793762, 0xbc3eebd6, 0xbce48194, 0x3cb074f8, 0xbcdd5bfa, 0x3c93372a,
0x3d04f033, 0xb96f5b00, 0x3bc81fd8, 0x3b93c8a0, 0x3c8c7340, 0xbc384022,
0x3ae723e0, 0x3beb9540, 0x3cf7406a, 0x3c91a4e6, 0x3b8861c0, 0x3b126200,
0x3b825998, 0xbcbc6bf6, 0xbc83fe1e, 0x3ccf744a, 0x3ccb8cb4, 0xbb96ba08,
0x3be4d728, 0xba4a9dc0, 0xbcfe37f6, 0xbcf9004c, 0xbce94c07, 0xbc3b98bc,
0xbc81edcb, 0xbce48a6e, 0x3c208ff0, 0x3c76d3ac, 0x3cbe305e, 0x3c6d8560,
0xbce826ee, 0xbcfd777f, 0x3cda06ca, 0xbb5a4498, 0xbcf19732, 0x3c193cc0,
0x3c5eb010, 0x3c5d80d8, 0x3c488e00, 0x3cf148d2, 0xbcf9ee85, 0x3c38741c,
0xbbcf39c4, 0xbce251df, 0x3c876fe4, 0x3c44f844, 0xbc978e98, 0xbc6054a2,
0xba43f440, 0x3c64ab2c, 0x3d0473ad, 0x3c97e1bc, 0xba3396c0, 0xbc370752,
0x3ca3df36, 0xbcc8f15e, 0xbd051a4e, 0xbb3d8468, 0xbab439c0, 0x3c8583d8,
0x3c47b868, 0xbc02b54e, 0x3c026d14, 0x3c8e1818, 0xbd02c07b, 0xbc4a8efc,
0x3c85949c, 0x3badd498, 0x3a1f3040, 0xbc20b43c, 0x3cd6d7de, 0xbc5a4480,
0x3c358af8, 0xbccec1da, 0xbbea6f68, 0x3cde5058, 0xbba4149c, 0x3c338fc8,
0xba8371c0, 0x3ccb02ec, 0xbb051390, 0xbc4385bc, 0x3ce366ac, 0x37e63000,
0xbc98d389, 0xbc88ae3a, 0x3a609d40, 0x3bf81c88, 0x3cb28efc, 0x3cdcec78,
0x3c232060, 0x3cac4102, 0x3c90d24c, 0x3ca9b482, 0xbcd228da, 0xbb5d5f20,
0xbc1a980e, 0xbc3a6640, 0xbc8951ab, 0x3cd96a1c, 0xbce888bc, 0x3bf4ebe8,
0xbc1164c0, 0xbc5dfe38, 0xbbd0bbc4, 0x3c0d117c, 0xbc82f2fa, 0xbcc73cbc,
0x3c6a1c30, 0xbceadbf2, 0x3ce19188, 0xba129880, 0xbc0a8d4e, 0xbc72873c,
0xbca63de2, 0x3cfe3336, 0xbb598810, 0x3cbe8a0c, 0xbcb0355e, 0xbd032fb5,
0xbcbba729, 0xbb2c3d78, 0x3c6f677c, 0x3b687420, 0x3ca18b34, 0x3ca06c76,
0xbc0c9fac, 0x3c106838, 0x3a9f6720, 0xbce0f62c, 0xbc9897b2, 0xbc99cdd8,
0x3bce3950, 0xbc5f8006, 0xbace8260, 0xba1e0b00, 0xbc276174, 0x3c9eda36,
0x3cd0f708, 0xbc3d0ef4, 0xbcaba8c0, 0xbb0d3ad0, 0xbbf87a70, 0x3c5a1e40,
0xbc819ea3, 0x3c93c35e, 0xbb8936e0, 0x3cc0ec64, 0x3c638138, 0x3c85e4bc,
0xbbb3bf80, 0x3cae71ec, 0x3c5457a8, 0xbcc25292, 0xbce3c265, 0xbcac987a,
0x3d0473f7, 0x3cc8f57e, 0x3c7c3fe0, 0xbcfa8a47, 0x38283400, 0xbb2c6a78,
0x3c84591c, 0x3c92d74c, 0xbc83c58b, 0x3ceba726, 0x3bf58360, 0xbc1a0ab4,
0x3cf38b7a, 0x3d00757d, 0x3c0a1d50, 0xbc7236e8, 0x3cdf5ef6, 0x3c9b80b4,
0xbc756d7c, 0x3d07b20b, 0x3cb913e2, 0xbb54d920, 0x3bf79af8, 0x3d06fc93,
0xbc80b485, 0xbc0fda62, 0xbc14574a, 0xbb2009f0, 0x3cca5496, 0xbcff7647,
0x3cdec87a, 0xbb6e7f78, 0xbce2a198, 0xbb8189e8, 0x3b288340, 0xbbd2e944,
0xbce2ca74, 0xbcccf7d0, 0x3cf8fb9e, 0xbb875f78, 0xbc15be56, 0x3cdefde0,
0x3cba7f7e, 0xbc4207de, 0xbb8099cc, 0x3bff01f0, 0xbcf34298, 0x3c2e1740,
0xbce95f9f, 0x3d04936f, 0xbc4fb03c, 0xbca6a58e, 0x3cc579c4, 0x3b3af630,
0xbcf3e7d6, 0xbadcffe0, 0xbce1ff9c, 0xbc29049e, 0xbbd78180, 0x39b82e80,
0x3c6c76c8, 0xbc22dc9a, 0x3ceb6498, 0xbac9cf40, 0xbc7c4870, 0xbc102e96,
0xbcfee66c, 0xbce39c49, 0xbbec429c, 0xbafc4ca0, 0x3c624a9c, 0xbb940e9c,
0xbcf671e1, 0xbc6023fc, 0x3c96c674, 0x3ccdb124, 0x3b560c10, 0xbc6148bc,
0xbcf1117a, 0x3b9eace0, 0x371f5000, 0x3b1702f0, 0x3cd7ee40, 0x3a6fb5c0,
0x3cf50782, 0x3c4fcb20, 0xbad398c0, 0xbc8bf7f6, 0xbc80ac9c, 0xbcace4a5,
0x3b9b7e00, 0xbb1f1148, 0x3cd1152c, 0xbbdc57c4, 0x3a843cc0, 0x3ccb4a14,
0x3c122e44, 0xbc9ec47a, 0x3ba24088, 0xba458680, 0x3ccedc74, 0xbbec7bc4,
0x3b6b6c20, 0xbcfb1801, 0xbc22e970, 0xbd068d96, 0x3d0217df, 0x3cd3ee0e,
0x3d080413, 0xbc91acae, 0x3c88e316, 0xbc5b5dda, 0xbbde2a88, 0x3c9130c4,
0x3bbf0378, 0xbb799600, 0xbca85385, 0x3cb98a2a, 0xbcde983a, 0xbce53336,
0x3cdfb316, 0x39c27d00, 0xbce8d374, 0xbbae52b4, 0xbc894a47, 0xbbad4d70,
0x3ce7d2d0, 0xbbee31c4, 0x3c80c000, 0xbbce64f0, 0x3cd7d3ee, 0x3ce571fa,
0xbc1c3284, 0xbcfea7b2, 0x3cb238b0, 0xbc723b5a, 0xbc2e711e, 0xbc984a05,
0x3c2bff90, 0x3c76a230, 0xbd06b800, 0xb981e100, 0xbc98b220, 0xbcaa8182,
0xbcc1c9fc, 0xbcab7792, 0xbccefab2, 0xbc421916, 0xbbba61b4, 0x3d03c6eb,
0x3cb024e4, 0xbbe25c1c, 0xbc62eb0e, 0xbc524868, 0xbd05b88a, 0x3c4c6d64,
0xbb533bf0, 0x3cf49a4a, 0xbb8d6af0, 0x3d034e95, 0x3c9656d8, 0xbc2337f4,
0xbcbffbe3, 0x3c8977c6, 0x3c9f03f2, 0xbcfc0aee, 0xbc73092c, 0x3bd739f0,
0x3cf7bd1e, 0x3baa3f00, 0x3d01db81, 0xbc56289e, 0x3c5f56ec, 0x3b6713b0,
0x3d05d4dd, 0xbcf150b2, 0x3c28d864, 0x3cce871e, 0xbba182d4, 0x3b127200,
0x3c8c9dee, 0x3b60fa40, 0xbb7fdac0, 0xbcae9f89, 0x3c1feb88, 0x3d041bc1,
0x3a7af540, 0xbacc52c0, 0xbcf828d4, 0x3ccfce22, 0xbbffeb1c, 0x3ca072e4,
0x3cdfe5fe, 0x3c8846d4, 0xbc409d16, 0x3c03b980, 0x3cc896ae, 0x3c082068,
0xbc19d440, 0xbcdfe130, 0xbcf0ecb6, 0xbc8e4a14, 0xbc07444e, 0x3c301d40,
0x3cf9087a, 0xbcbd2bd8, 0xbcd98945, 0x3bccadf8, 0xbc9ec78b, 0x3cb8d058,
0xbd05ff5b, 0xbb119f20, 0xbc7365f0, 0xbc8a07b8, 0x3ccf3b88, 0x3b79d640,
0xbcfd2ece, 0x3ce106ce, 0x3ce81996, 0x3be782e0, 0x3bcc7580, 0x3cc2b156,
0xbcd8c070, 0x3c7ce6bc, 0x3c9e0cc0, 0x3cb48f3e, 0x3cc262da, 0x3b55d8e0,
0xbcc15565, 0x39946780, 0xbc64f422, 0x3c8bbab2, 0xbc953aa7, 0x3c863780,
0x3ca7e1ea, 0x3beac828, 0x3be755b0, 0xbc956369, 0xbbed2c78, 0xbc5e180a,
0xbd0560a3, 0xbbb5c768, 0x3cc2d328, 0x3bf84868, 0x3c8f50d8, 0xbce6e03f,
0xbce40316, 0x3c751da0, 0x3c456c98, 0xbb66bbd0, 0xbc88117a, 0xbce4e538,
0xbce70252, 0x3c3fe14c, 0x3aa19bc0, 0xbc937442, 0x3c624c84, 0x3c3a0238,
0x3ce65722, 0xbcb0e9a0, 0xbcc2ec34, 0xbcf4b6f2, 0xbd028280, 0x3b41ed50,
0x3caba662, 0xbb82a71c, 0xbc86a9c0, 0x3b31ed80, 0x3bfe5ce0, 0x3b9bd280,
0xba35e340, 0x3d021d1f, 0xbcb5b66c, 0xbcf9d47f, 0x3c8b17d8, 0xbce67927,
0xbce8378c, 0x3ccc25aa, 0x3c04994c, 0x3c3ca310, 0xbbe559d8, 0x3cb0d90c,
0xbca26047, 0xbbaadb78, 0x3bd805a0, 0x3c212190, 0xbcde54e7, 0x3c820ef2,
0xbc75bc2c, 0x3a727900, 0xba00d540, 0xbc5fb1e8, 0xbc155d04, 0x3cb2bab0,
0xbce88ee1, 0xbc9302fc, 0xbc1abb04, 0xbd070c5f, 0x3ca67796, 0xbc4e2a5a,
0xbbcc1c1c, 0x3cfb92f2, 0x3cf43336, 0x3cc967dc, 0xbcf7c134, 0xbca300fe,
0x3c0479c4, 0x3c9e093e, 0xbb933278, 0x3c592e64, 0xbb8d7570, 0xbcd99bda,
0x3c158144, 0x3c8a6aec, 0xbc3b4e44, 0x3c0839d4, 0x3d03d73d, 0xbc7c628e,
0x3c27d26c, 0xbc6b8fbc, 0xbcc1136e, 0xbcda85f4, 0x3cf80d3e, 0xbbe9d390,
0xbc9bdc00, 0x3c84f3ee, 0x3b4c5510, 0x3cc32202, 0x3a160580, 0xbccf5010,
0xbb3e7ef0, 0x3b8573e0, 0xbafc4e20, 0xbc5ef50a, 0x3b68db00, 0x3c7a38e0,
0x3cd94edc, 0x3cebb86c, 0x3b009760, 0xbc508ec0, 0xbc93afa9, 0x3ca4c752,
0xbccad740, 0x3bd251d8, 0xbd037efd, 0x3ce126ac, 0xbc444f00, 0x3bc8fef0,
0xbc8773ab, 0x3c8f8a44, 0x3cdfea4a, 0x3c186c6c, 0xbca5cb49, 0x3cc193ca,
0xbd0874ce, 0x3ca9d586, 0xbbe956ac, 0x3cfae08e, 0x3cf17372, 0xbc614856,
0x3cfbe4da, 0xbbe03c70, 0xbcf0db56, 0xbb853690, 0x3c94ce26, 0xbc943114,
0x3ba49e78, 0x3ce11a52, 0x3ccbe3d6, 0x3bd42b00, 0x3c8a4e0c, 0x3d03168f,
0x3cfab03e, 0xbcb93b54, 0xbc72f3ca, 0xbc61ea52, 0x3c9b9678, 0x3c933c80,
0xbc8fd13e, 0xbbbd9d54, 0x3ca6ba12, 0xbbd3a29c, 0x3c46c248, 0x3c7bf530,
0xbc187684, 0xbcd3ff70, 0x3c850b4e, 0xbae7abe0, 0x3b77f920, 0xbbe86e34,
0xbced401c, 0xbcb71340, 0x3c4243c8, 0xbd028d65, 0xbc7ee786, 0xb9b09b00,
0x3cbf051e, 0x3babd340, 0xbccaa152, 0xbcd92201, 0xbcf7c16e, 0x3d04eb79,
0xbc9ae42e, 0x3c95eee0, 0x3cedea06, 0xbb9dfe00, 0x3c28e33c, 0xbcc3013c,
0x3c5503e0, 0xbcab76ba, 0x3b85bac0, 0xbcf9e0f0, 0x3c21e944, 0xbcbbc8c9,
0x3bb96428, 0x3d00fed1, 0xbca0ed4e, 0x3ca3f8d0, 0x3a4dfc80, 0xbb8b93bc,
0x3cb8b96e, 0x3d068f0b, 0x3cc4b868, 0xbcfb1207, 0xbd011463, 0x3c7661bc,
0xbba73b70, 0x3d051491, 0x3c8f4ffa, 0x3cfb6062, 0x3cb4c542, 0xbcea0814,
0x3b63ddf0, 0x3bf17388, 0x39867580, 0x3b977398, 0x3c531c4c, 0x3c7378b4,
0xbc77a492, 0x3bfec698, 0x3cab60ec, 0xbb5ae458, 0xbb3e9aa8, 0x3bad48f0,
0x3bb83bd0, 0xbc3d94f0, 0xbba027f8, 0xbc747aca, 0x3c38bea8, 0x3999c800,
0xbcf83516, 0xbbca5a60, 0x3cbf2568, 0x3ce82c4a, 0xbd07cf83, 0x3cf883fa,
0xbcb019fc, 0xbbc520e0, 0xbcec7c5c, 0x3c01a078, 0xbceaf723, 0xbca2f14b,
0x3c581308, 0xbc97943e, 0xbbd7efb4, 0x3c2cc478, 0xbcc7b300, 0xbc9c64c9,
0xbad26000, 0x3cd0a022, 0x3cc5f888, 0xbcf6a9f6, 0xbc80ab27, 0x3a2c1ac0,
0x3ce5b2e8, 0x3bcaf110, 0x3b49afb0, 0xbd008b8e, 0x3ce1c3e2, 0x3cb933f0,
0xbbc654f8, 0xbbd1dd70, 0x3ceed6e6, 0xbc92c338, 0xbbee5670, 0xbce82970,
0x3c177538, 0xbd001ef6, 0x3c45dd78, 0x3cc30bce, 0x3a241040, 0xbcd79ede,
0xbc52040a, 0xbcfe23bf, 0xbccc6e29, 0x3ba9a0e0, 0x3ce5cb08, 0xbbb8bcac,
0x3cc72efc, 0xbc9f1c18, 0x3bfc8e18, 0xbb7d54c0, 0x3ccdcc6e, 0xbb0f8178,
0x3d064b73, 0xbbc32110, 0xbc96b494, 0xbc9087e7, 0xbcff701d, 0xbb92b3b4,
0x3c28f054, 0xbc1d313c, 0x3ca2a5ae, 0x3b301940, 0xbce6244c, 0x3aa48600,
0x3cef975a, 0x3ce93b72, 0xbc7e7070, 0x3bee4e10, 0xbc990b2e, 0x3c024440,
0xbbfa03d8, 0xbcf3b856, 0x3cef0a4e, 0xb9fb7e80, 0xbc089470, 0xbcf27ed4,
0xbcf25630, 0xbcfb8b6c, 0xbce5cb61, 0xbcdd3307, 0x3b252f30, 0x3caad482,
0xbd02302e, 0xbcc9f17e, 0xbb714478, 0xbc7903b4, 0xbccb3ea5, 0x3c9e1696,
0x3bf65510, 0xbc1905b8, 0xbbb197bc, 0xbc48950e, 0x3a146f80, 0x3d0418d3,
0xbc86a367, 0x3cc492ba, 0x3c81e09c, 0xbc1cbb74, 0x3ca1fe6e, 0x3c30e93c,
0x3ce7f838, 0xbbc89108, 0x3cbb1f90, 0x3b89aa00, 0xbcf530ec, 0x3cb11ed8,
0x3c43df68, 0xbce19434, 0x3b9e2490, 0x3cf89f56, 0xbc849774, 0x3c9fc5d2,
0x3cd40aac, 0x3c7a737c, 0x3c7d2ea4, 0xbc6e32d2, 0xbcf60c58, 0xbceac681,
0x3b5cc6f0, 0xbcb24d05, 0xbc50ea44, 0x3c825ef2, 0xbc8e625e, 0x3b913ee0,
0x3cbabd98, 0x3c4cc200, 0xbc927f40, 0xbaa63d80, 0xbcfb9adf, 0xbb255d48,
0xbcb4b2be, 0xbc223dec, 0xbca329c0, 0x3996f300, 0xbc4b28a8, 0xbca2024e,
0x3bc93218, 0xbcfb261d, 0xbcc07b70, 0x3c5b7edc, 0x3c29eec0, 0xba877a60,
0x3bcaaaa8, 0xbc985c60, 0x3cdabce2, 0x3c05e470, 0xba12b580, 0xbace2ce0,
0x3cf43386, 0x3cc77856, 0x3cdac336, 0x3bcc9858, 0xbca85360, 0xbc7dfc74,
0xbca8f6f2, 0xbcebb929, 0x3c54ddf4, 0x3cef30be, 0x3d086381, 0xbcc61963,
0x3c4933a0, 0x3bd0e390, 0xbcd9286e, 0xbcf52a29, 0x3c41a090, 0xbbe75208,
0xbcfae2f0, 0xbb6de348, 0x3c371660, 0xbca5ee80, 0xbcbd551a, 0xbc87a0cf,
0x3d07b59b, 0x3d07c167, 0x3cf04702, 0xbcb3c79e, 0xbcd251d2, 0xbce30070,
0xbce0f9a9, 0x3bf6c1d8, 0xbd0731b5, 0x3d0370b5, 0xbcaa443a, 0xbc56cda8,
0x3cb7792e, 0xbc8f5829, 0xbcf93b5f, 0x3c839fac, 0xbcaae5d4, 0xbcc4e849,
0x3cb66a44, 0x3b199630, 0x3c468f30, 0x3c77b788, 0x3b168fb0, 0x3c64b68c,
0xbd06ecbd, 0x3b8c23c8, 0xbce2b2a3, 0x3c8dc6a0, 0x3cdd1d38, 0xbca4e910,
0xba55c9c0, 0xbcf4d532, 0x3c2cdfc8, 0x3ca1fbc4, 0x3c86ddc6, 0x3cec0c5a,
0xbb921d80, 0x3cca2cec, 0xbd01a764, 0x3c65cc38, 0xbd015bd0, 0xbbb9c09c,
0x3ce1ab58, 0x3cc7f21a, 0x3cbb4f7e, 0x3d083d27, 0x3baef860, 0xbc0828bc,
0xbcdd867c, 0x3c7b03c0, 0xbc644a06, 0x3c82be4e, 0x3b24bff0, 0xbbb0e154,
0xbcd2baa5, 0x3c0c5f34, 0x3cf15a7e, 0x3c38b8b4, 0xbbf001cc, 0x3ceda660,
0x3c007a10, 0x3b9db930, 0x3d064f59, 0x3c60ed70, 0xbc6bf90e, 0x3c5f42c4,
0xbcfb25e3, 0x3a1da400, 0x3ada6b40, 0xbbd3a34c, 0xbcc02c69, 0xbc91701c,
0xbce28e14, 0xbbdfb1bc, 0x3c7d7424, 0x3d011235, 0x3c93de26, 0xba18af80,
0xbb9be300, 0xbb4fd198, 0xbc1ab75a, 0x3d038375, 0x3c476580, 0x3c61dba0,
0xbaed0000, 0x391a4400, 0x3beba058, 0xbcc32eba, 0x3c35a408, 0x3c5e8790,
0xbcffa045, 0xbac834a0, 0xbcc64ee0, 0xbcca8e32, 0x3cee84e2, 0x3c44d5e0,
0xbc86f4c9, 0xbaf97260, 0xbc4accb4, 0x3cffb27e, 0xbcb03482, 0x3be7bc00,
0xbbff9e78, 0x3c218ce0, 0x3c97a8b4, 0xbbc192b4, 0x3c3c5bbc, 0x3c89a4e2,
0x3c7881e0, 0x3cfd65c2, 0x3ce6165e, 0xbcec7dae, 0x3cac24bc, 0x3ccaffde,
0x3c8b2324, 0xbd081384, 0x3c8e6bdc, 0xbbff50f8, 0xbc8a41fc, 0x3b812fc8,
0xbc95b9c5, 0xbc795b12, 0x3b0e21b0, 0x3cc139f2, 0x3ccdcd8a, 0x3c919a58,
0xbadf4040, 0x3d0388bb, 0x3c126440, 0x3c73270c, 0xbce2c5c1, 0xbcd90be1,
0x3ca06d9c, 0xbcd0e34c, 0xbcba1c89, 0x3cc59b90, 0xbc9a236b, 0xbc8a836b,
0xbcd48874, 0xbcb74023, 0x3c268c1c, 0xbc994032, 0x3c761ec0, 0xbc192b30,
0x3c9233c6, 0xbce39dd8, 0xbbced258, 0x3a8747e0, 0xbc4cb138, 0xbc35a752,
0xbc40e068, 0xbcf891fd, 0x3bf3ce18, 0xbc914e4e, 0xbc210292, 0xbc601c40,
0xbce2742c, 0xbc2e3b66, 0x3c23d840, 0xbb20b9c0, 0x3afc74c0, 0x3cb3033e,
0x3cc42b52, 0xbc3427a2, 0x3d07da63, 0xba00c9c0, 0x3cdb947c, 0x3cf3b55a,
0x3c9bbc24, 0xb94a5300, 0xbb8e6168, 0xbced349c, 0xbcd450c3, 0xbb34d8e0,
0xbc17c062, 0xbcb5627a, 0x3c63403c, 0xbb5d8630, 0xbcbbb84e, 0x3d08704b,
0x3c1b21d0, 0xbbecbbf0, 0xbcef113c, 0x3bcd7dd8, 0xbc363f2c, 0x3ca48530,
0x3d073acf, 0xbcb2fd52, 0xbba4513c, 0x3b33cc10, 0xbc11bc84, 0x3c98d7c6,
0x3ba25528, 0xbc456d0a, 0x3c807874, 0xbb66bd88, 0x3cf9298e, 0x3cfb2672,
0x3c2b2e3c, 0xbc465530, 0xbc7ffd8e, 0xba7fd240, 0xbb0b10c0, 0x3bb898f8,
0x3c69a6a8, 0x3b675ab0, 0xbc53e2ec, 0xbd0407e0, 0x3c3ff124, 0x3a8811c0,
0x3b33baf0, 0x3b805ec8, 0x3cc979d6, 0xbcc31bb2, 0x3c93e704, 0x3bd1c500,
0x3b2cfd80, 0x3c82f4ac, 0x3bf7a6e0, 0x3c477180, 0xbcd269e1, 0x3c4d62dc,
0x3cff0602, 0x3c53d124, 0xbd0157b5, 0x3c655cb0, 0xbcbed309, 0x3cc7e680,
0xbcc89f70, 0xbd01e10f, 0x3c8156c4, 0x3cc82ac0, 0x3cf3b95e, 0x3cee6b4c,
0x3c8451d0, 0x3cf4d9d6, 0x3d01d7d3, 0x3c12fc68, 0xba812800, 0xbbf9c700,
0x3ac36080, 0xbcfd4958, 0x3c8af64a, 0x3c9ce8f4, 0xbc9f2ac2, 0x392d5b00,
0x3c8e7cdc, 0x3caeed1e, 0xbca5ae47, 0xbcb10f92, 0xbc128774, 0x3c4e5084,
0xbb9ff568, 0x3bc59390, 0xbd03c71a, 0xbd05f08f, 0x3c3516fc, 0xbc11882c,
0x3cdaec22, 0xbd04e1b9, 0x3c8bbeee, 0xbca7fd69, 0xbb944388, 0x3cc9e5cc,
0xbcea46fa, 0x3c0981ec, 0x3cc1eac4, 0xbcd82e09, 0x3c2c7ff0, 0x3cc37514,
0xbc102a0a, 0x3ccab560, 0x3aab6800, 0x3c7fce90, 0xbc8970e0, 0xbc857b94,
0x3a25c480, 0xbceec3a1, 0x3b805da0, 0xbc5cbc28, 0x3c8e9366, 0xbcdc9a0e,
0x3acb6aa0, 0x3cebd676, 0x3cd21384, 0x3b0405d0, 0x3becd640, 0x3cf9c51a,
0x3cc07f88, 0x3b32f0a0, 0xbca56c38, 0xbcb835f8, 0x3be691e8, 0x3cb4a372,
0x3c35ccd0, 0xbb4813f0, 0xbca5f77e, 0xbc8eee78, 0xbbcc143c, 0xbce5f749,
0x3c90daec, 0xbca1a6e9, 0x3bf37240, 0x3cff7b2e, 0x3cf37c9e, 0x3ba8ab90,
0xbbe27768, 0x3be9b410, 0xbcff2392, 0xbc51adbc, 0xbaa99120, 0x3cd5f0bc,
0xbb80d9bc, 0x3ce34b16, 0x3c1c5950, 0x3cec9788, 0xbcebf9a7, 0xbb65bd98,
0x3d07f071, 0x3be2cb90, 0xbc3ae3ac, 0xbca740a7, 0xbb080260, 0xbc48507c,
0xbcc31098, 0x3c0e56d4, 0xbc65f9b0, 0xbc05f622, 0x3c97a35c, 0xbcd1b627,
0xbc8863a7, 0x3bf91fc8, 0xbc77b5da, 0xbcbecc3c, 0x3c29339c, 0xbc6124ac,
0x3d033551, 0xbccea7f0, 0x3cc14b62, 0xbceb1b27, 0xbb7bdf98, 0xbc437e1e,
0x3c2a67e0, 0x3cb5d8da, 0x3be8e108, 0xbcae5596, 0x3bda5500, 0x3c3e70b4,
0xbb56ae88, 0xbcaf602c, 0x3c2bc4f4, 0xbcd81a72, 0x3caa69fa, 0xbc5a8e46,
0xbcb7a360, 0xbc8b4f8f, 0xbcfc7add, 0xbc968b49, 0x3b96a450, 0xbb516348,
0x3c5b38f8, 0xbcf4b78e, 0x3d00afa9, 0xbb861934, 0xbc225f3c, 0xbc722068,
0xbc52d470, 0xbc81109c, 0xbcd19e69, 0x3cf15eee, 0x3b3f7f40, 0x3c86fde4,
0x3c841d52, 0xbc3fc934, 0xbb981878, 0x3ce9c266, 0xbd0757c2, 0xbc310934,
0xbbba682c, 0x3c226ff8, 0x3cc1aef8, 0xbc99dfba, 0xbc51c2da, 0xbc194f70,
0xbc94858b, 0xbc0be334, 0x3cc96dd4, 0x3cab7d3e, 0xbb4d4800, 0xbbcd1110,
0xbc9e8d5e, 0xbc79985a, 0x3ccba72e, 0xbc49e9de, 0x3aec0ca0, 0x3cfbde2a,
0x3cc54062, 0x3c5cc4f4, 0x3be1b058, 0xbc9d79f0, 0xbcce4198, 0x3c895c2e,
0xbd02ae7d, 0xbc41749a, 0xbcfa24b4, 0xbcd941a9, 0xbcfca8ae, 0xbcbbb854,
0xbc205552, 0x3d03416d, 0xbc2c3266, 0x3cd1be9c, 0xbc78dc8a, 0x3c0a7dbc,
0x3cdcba4e, 0x3c4f7d18, 0x3ccbe278, 0xbc964cd0, 0xb957cf00, 0x3c7a4ca0,
0x3c9bfd10, 0xbb29a4e0, 0xbab92800, 0xbbea93f0, 0x3c01c884, 0x3c4e1bfc,
0x3ca1405e, 0xbd04a8dd, 0x3c97623c, 0x3ce4e09a, 0xbbaca234, 0xbc4bb730,
0xbcd13356, 0x3c176238, 0x3cbcada2, 0xbc919b45, 0x3c620020, 0x3ca76d18,
0x3c81756c, 0x3cb5b014, 0x3b8cad50, 0xbcaf6732, 0xbc2bfbb8, 0xbb82979c,
0xbcd79ed8, 0xbba46e9c, 0xba3c0d80, 0x3bf32790, 0xbccd0363, 0xbcfeb2d0,
0xbc7db252, 0x3ce3bbb0, 0x3cf11c6e, 0xbb10a498, 0x3cdaf408, 0xbba954f8,
0xbbfac770, 0xbcb84c47, 0xbaa2cca0, 0x3c6ea924, 0x3c0871ac, 0xbc072378,
0xbc5c908a, 0x3c57b600, 0xbc216d8a, 0x3c2fea2c, 0x3ba4a400, 0xbd083ef3,
0x3d00cfb5, 0xbc657028, 0xbcc1a685, 0x3b54d820, 0xbc144db4, 0xbbbe6254,
0x3ca97a1a, 0x3cb42dc4, 0xbc99521c, 0x3cd05c10, 0x3c367044, 0xbcb84358,
0x3a2aeec0, 0xbd0451ab, 0xbcfd23d6, 0x3cd36c8a, 0xbcd8a67a, 0x3bf86e78,
0xbcf56c07, 0x3a3f4d80, 0x3bdcd350, 0xbcf53e67, 0x3d00f3cb, 0x3a984440,
0xbc4f9d8e, 0x3b366650, 0x3cb74bec, 0xbc8b6ecf, 0xbc8f8a54, 0x3c328ba0,
0xbb32cd68, 0x3ca9a316, 0xbc8a87e3, 0xbc1eb4f8, 0x3cdd0c96, 0x3c1b1080,
0xbcf105c9, 0x3c9712d4, 0x3c7c721c, 0xbc6e1122, 0x3c0f9f2c, 0x3be70530,
0xbcfc1e7f, 0x3b8a9d00, 0x3cfbe32a, 0x3cd39d64, 0xbc6bc71e, 0xbcba1256,
0xbc2a8b38, 0x3bc427a8, 0x3b356e60, 0xbc856ada, 0xbc98bf2e, 0x3c831ab8,
0xbc87afbe, 0x3b178930, 0x3bbbddb0, 0xbc69e9b8, 0xbcc361f8, 0xbc504e2c,
0xbcb9d2be, 0xbc9f1b30, 0x3b7bc740, 0xbbb9d234, 0x3cfd5696, 0xbcd3fec3,
0x3b7f5a90, 0xbb764668, 0xbc55cc22, 0x3c9cc034, 0x3c54e4e8, 0x3b32f0c0,
0x3b4eabe0, 0xbabc2e40, 0xb923a200, 0xbad81480, 0x3ca45c6c, 0x3c6c9f44,
0x3b5d6f20, 0x3cc5c13c, 0x3cbef83e, 0x3ca1ccae, 0x3c1fced4, 0xbb840c00,
0xbbeb5b2c, 0xbc5854ac, 0x3c3ed580, 0x3ca2d572, 0x3d001821, 0xbcfd058e,
0xbc9c31e7, 0xbcd0513e, 0x3cbf8fd6, 0x3c141c34, 0x3cda3850, 0xbc99e2d0,
0x3d0631af, 0x3bf1ece8, 0x3c4b2df4, 0x3cb62e62, 0x3b43c0d0, 0x3c2bc42c,
0xbc05da80, 0x3cdc6088, 0xbce1ea56, 0xbce4b754, 0x3d0325b5, 0xbc1d286c,
0x3d03210f, 0x3cd20ef8, 0xbb98a0e8, 0xbc724886, 0x3990fe00, 0xbca9d589,
0xbd061335, 0x3ce563fc, 0xbcf42150, 0xbd06d273, 0xbc1a1d2c, 0xbba770a4,
0x3ab20820, 0x3c11c7ec, 0xbc4aab00, 0xbcc23ae3, 0x3c18e3e8, 0x3cba4b1e,
0xbcd0ad32, 0xbc82fab8, 0x3cf3a5a2, 0xbba4bfbc, 0xbc53dea2, 0xbb60a568,
0xbb9544a4, 0x3c4482a4, 0xbd022ff9, 0xbc70062c, 0x3bc5c6f8, 0x3c70e28c,
0xbc8fb583, 0xbcd33961, 0xba58d840, 0xbbb95e08, 0x3c781470, 0x3ca178c6,
0xbc4cc830, 0x3c5fae0c, 0xbc70129e, 0xbc56003c, 0x3cc643e8, 0xbc0f8cc4,
0xbc61bb06, 0x3b4bdf10, 0x3c5f44f0, 0xbc9ab350, 0x3c3f0adc, 0xbcb88bbc,
0xbb687d30, 0xbc8b0087, 0xb9c57500, 0x3c3d2f74, 0xbac66460, 0xbc1a1ace,
0x3c3e85bc, 0x3adb54e0, 0x3c88a206, 0xbce59fc5, 0x3c1d3fdc, 0xbc8e51b6,
0xbc08c38a, 0xbc9c1c36, 0x3ce2ee86, 0x3b8a11f0, 0xbc41dcec, 0x3caf8bf4,
0xbce48701, 0xbcab3300, 0xbb89681c, 0x3cb0231a, 0xbba1bdf0, 0xbc9cd35a,
0xbcf7a6b4, 0xbc4b7e4a, 0xbcfc3da3, 0xbce375ac, 0xbc0cad5a, 0x3bd99a88,
0xbd010fa3, 0xbc8e4f45, 0x3cf2f416, 0x39bdfb00, 0xbcf61814, 0xbc9dadd6,
0xbcec76c7, 0x3b63d250, 0xbc3d5a68, 0xbca98b4e, 0x3c532870, 0xbc891160,
0xbb82ab78, 0x3d05d893, 0xbcb4aafa, 0x3cabd366, 0x3cadef44, 0x3cf8e1e2,
0xbcf69896, 0xbc6b242c, 0x3c83ab98, 0xbcd96dc9, 0xbc5cf51a, 0x3c99d3d4,
0x3beb7cd8, 0x3ce729d2, 0xbc3bc430, 0x3a378d80, 0xbc3b44d6, 0x3c5909d8,
0x3cff20b6, 0xbcfebc2e, 0xbce4d2ba, 0xb9f32300, 0x3b9a8ea0, 0x3c85d8f4,
0xbd00121b, 0x3c7c43a8, 0x3cee57ca, 0xbc0c51ce, 0xbd0361fa, 0x3d0490b3,
0x3c64c36c, 0xbcad1fac, 0xbb1534e0, 0xbc844a6f, 0xbcae8670, 0xbaa58120,
0x3c15d028, 0xbb641f20, 0xbbdfe934, 0x3b0e7820, 0xbcff2025, 0x3cf6812a,
0xbcc6873e, 0x3cad7ca4, 0x3ceb95dc, 0xbcd5c158, 0x3cf7feba, 0x3c55b5c0,
0xbccfc980, 0x3cffb27a, 0x3b741d10, 0xbce142dc, 0x3d05e58b, 0xba3f8d80,
0xbd008da3, 0x3ce25cf4, 0xbb0e5ce0, 0xbbb0b5e8, 0x3ceee2e8, 0x3d057d6f,
0x3beed588, 0x3cc1d5b6, 0xbcf7c752, 0xbcb96b7a, 0x3cf6a0aa, 0xbc60a652,
0xbcb54eb4, 0x3c9f3698, 0xbce42ff2, 0xb8ad1600, 0xbcdf26e5, 0x3cf2c1a6,
0x3bcf89a0, 0xbbc7d5cc, 0x3c240b9c, 0xbcb2f600, 0x3bef1888, 0xbcd39687,
0x3cf4c39e, 0x3c29e8b4, 0x3cb3df62, 0xbc68a530, 0x3c05442c, 0x3cd53d1a,
0x3cc7c256, 0xbcdc8a2e, 0x3ce5b7b6, 0xbbe30b78, 0xbc8bddde, 0xbd048c9e,
0x3ab8d040, 0xbcf1b270, 0xbca48fe2, 0x3c48db6c, 0x3c61e860, 0x3c2b1e8c,
0xbcbe2dd4, 0xbcbaa53c, 0x3c1fdfd0, 0xbcf1ebd8, 0xbca668b6, 0xbb8c7100,
0xbc04cc84, 0x3ca11246, 0xbc83da2b, 0xbc34d780, 0x3cc96580, 0x3bf0f480,
0xbccb4ec0, 0xba9cfba0, 0x3ca1e24c, 0x3c5659ac, 0xbb0f6b30, 0xbcc21663,
0xbb912b54, 0x3c90beea, 0x3c4f32f8, 0x3b765a30, 0xbcfc58e7, 0xbc318ac4,
0xbd0644a7, 0x3c937c86, 0x3afb3fc0, 0x3cb73b0c, 0xb7c26000, 0xbb699530,
0xbcf043b2, 0xbc2215a2, 0xbc9e713a, 0xbbabffcc, 0x3d075a2f, 0xbd005a1d,
0x3c1f5124, 0xbc8ecbde, 0x3bc09798, 0x3cc5bb80, 0xbbd6e768, 0xbd014cd0,
0x3c4f5c80, 0x3cb3800e, 0xbcf17241, 0x3c9a5b24, 0x3d061bdd, 0x3ba66960,
0x3c39eedc, 0xbbb5efd4, 0xbc67a0ac, 0xbcabff78, 0x3ce71656, 0xbbdb2508,
0x3ce9e56c, 0x3cbdb776, 0x3ab1cda0, 0xbd031962, 0x3c88b13e, 0xbcbd6a98,
0x3c52e68c, 0xbcc5541c, 0x3c58af70, 0xbc8aa10d, 0xbaa88bc0, 0x3cf1b37e,
0x3cefde26, 0x3ceef896, 0xbbab2624, 0x3cbec2be, 0xbcdb6b34, 0x3c89d472,
0xbc5ab87c, 0x3cee7aae, 0x3c071668, 0x3cb1fee4, 0xbbbaf770, 0xbbb33ccc,
0xbcfa02dd, 0x3b88b5b0, 0xbbacec08, 0x3c4c7078, 0x3a533940, 0xbb93bd88,
0x3cdc79a8, 0x3d026b9b, 0xbc58cfca, 0xbd027e47, 0x39b43200, 0x3bb25e20,
0x3cb83290, 0x3caa280c, 0x3d08510b, 0x3c333000, 0xbc9b5b72, 0x3c90a6e6,
0x3c769bd0, 0x3c8f6ee0, 0xbc127ebc, 0xbce680f8, 0xbc067a3c, 0xbcda0f2e,
0xbc13b496, 0xbc3ae8b0, 0x3ca9727c, 0x3aeed180, 0x3c3204d0, 0x3cfc8cca,
0x3cc53674, 0xbcb47360, 0x3c5a9fdc, 0xbc691774, 0x3cab724a, 0x3ca55d6a,
0x3ca0cbfa, 0x3c397c24, 0xbc14075a, 0x3c1a7448, 0xbc73c378, 0xbca40365,
0xbcceed76, 0x3c83111e, 0x3c16b72c, 0x3b410060, 0xbd01924a, 0xbcaaf102,
0xbc59517c, 0xbc565b9a, 0xbcee76c5, 0x3c4747a8, 0xbc647d56, 0x3c9787ec,
0xbb72fea8, 0xbc4e057c, 0x39ed4500, 0x3cee9ac2, 0xbc0596e2, 0xb9f84800,
0xbcfb4f09, 0xbc4dcb68, 0xbcc20765, 0x3c9267b8, 0x3c69add0, 0xbbe132f0,
0x3b33e8c0, 0xbcd84309, 0x3c3adaf4, 0x3be8a278, 0xbc2a9744, 0x3b3003a0,
0x3c26b764, 0x3a7bbac0, 0x3bb76430, 0xbc584b92, 0x3b4c1390, 0x3ce6161e,
0x3ba96ba8, 0x3ce31d1c, 0xbc374df8, 0xbc731596, 0xb9437c00, 0x3cd9500a,
0xbcb266d2, 0x3c9e60ac, 0xbcc69550, 0x3ce4606e, 0xbc45f5c0, 0x3c96f756,
0x3c94550a, 0x3c53ef88, 0x3bf1d488, 0xbc46b02c, 0x3ca98d24, 0x3cf092a6,
0x3c6d5b20, 0x3c2cd21c, 0xbc4c64c4, 0x3c3010ec, 0x3bf333f8, 0x3c7e2550,
0x3ce84bb6, 0xbc95c87a, 0x3cefc47e, 0xbb963a34, 0x3bdfa580, 0x3c8f7bce,
0xbc682d4e, 0xbcfc9045, 0xbbc1b02c, 0xbb6739e0, 0x3bbc8bc8, 0x3bfe00c0,
0x3ca10ff4, 0x3bfbbf90, 0xbd076589, 0x3cc5eb02, 0xbc1d3b1a, 0x3c10a760,
0x3c5d67fc, 0xbbe5159c, 0x3cf10b0e, 0x3cc769a4, 0x3cd60c02, 0x3cc997c2,
0xbc8311f8, 0xbcf171e1, 0x3c4f4ba0, 0xbb8aaa3c, 0xbc326400, 0xbcf261d4,
0xbcd6541a, 0xbd07bbe6, 0x3b6e3cd0, 0x3b0a9030, 0x3ce36768, 0xbccdda09,
0xbcfdc752, 0xbd054829, 0x3a141cc0, 0x3c0c6b48, 0xbb949af0, 0xbc51638e,
0xbd07759c, 0x3cba70c0, 0x3cd7013c, 0xbbc33288, 0x3cef3a36, 0xbc627328,
0x3c918272, 0x3cc07d32, 0xbbb1064c, 0xbceea610, 0x3b189e50, 0x3cf292ca,
0xbcad085c, 0x3c511718, 0xbcf6b363, 0xbc8c3ada, 0xbac24260, 0x3cd05774,
0x3abc26a0, 0xbc693b56, 0x3cf0cd5a, 0x3bc29f90, 0xbc9beef4, 0x3cb18942,
0x3cd37d4c, 0x3c3dba5c, 0x3cf0106a, 0x3c99f240, 0x3c701eec, 0x3ccb8c6c,
0xbc579bb8, 0x3cde0450, 0x3cbf11bc, 0xbbe146ac, 0xbc2a5922, 0xbc3b8f8e,
0x3ce13fe2, 0xbc82a743, 0x3c76e5b0, 0xbcd67aac, 0xbbcda03c, 0x3c801036,
0x3bbf57f0, 0x39884100, 0xbcb5169e, 0xbcca37ba, 0x3c716cf0, 0xbc05b844,
0x3cd7b4fe, 0xbc28fdf0, 0xbca14a74, 0xbc1ba43c, 0xbc520640, 0x3d031b4b,
0xbcf8b3f6, 0x3ca639a6, 0xbcc63a49, 0x3caceaea, 0x3cc0a818, 0xbc124816,
0xbcfe1963, 0x3cb51a1a, 0x3ccfeb04, 0xbc65d6a8, 0x3a98a3a0, 0x3ce07b8e,
0x3ca3c49e, 0xbce333ae, 0xbc84e6be, 0xbc81e5d4, 0x3d087a8d, 0xbce9af4c,
0x3cccf4f2, 0xbc902272, 0xbcfc35ff, 0xbc9bee87, 0x3c9b95e6, 0xbb606558,
0x3cc2680c, 0xbbd6f708, 0x3cfe99e6, 0x3b575d40, 0x3ccd789e, 0x3c957b14,
0xbc7f4ff8, 0x3c99c940, 0xbb9b4310, 0x3c9cfc38, 0x3baf8ca8, 0xbc7d2b9a,
0x3bcc0388, 0x3cb47954, 0x3c9d434c, 0xbb5d5088, 0x3b830b78, 0xbc80bbe5,
0x3cf821c2, 0x3ca7ca72, 0xbcb63518, 0xbb06adc0, 0xbc4c0184, 0x3be5da30,
0xbcc41a1c, 0xbc23dea6, 0xbd029940, 0xbc9c843e, 0xbc9d29a0, 0x3ccd5438,
0x3cedfcec, 0x3cc17af8, 0x3cc77452, 0x3bb6ec48, 0xbc2d0d96, 0x3cafe94e,
0xbb8c751c, 0xbcec9fce, 0x3cc950c2, 0xbc4d007c, 0xbca16ada, 0xbbd06b24,
0xbc42afb0, 0x3be33c68, 0x3b45c1e0, 0x3c7652e0, 0xbc927a7e, 0x3b1e6ce0,
0x3caedb76, 0x3bcf77b0, 0xbcb5789e, 0x3cdeb7e6, 0xbc8ebc92, 0x3c1a7ad0,
0x3cdcb63a, 0x3c7faccc, 0x3d0699c7, 0x3c217944, 0x3be637f0, 0x3bacb788,
0x3c870712, 0xbd040b4a, 0xbb1eb6e0, 0x3cc4eb4e, 0xbc947985, 0xbca65ce5,
0xbc38e6ac, 0x3cf7a40e, 0x3bec0ba8, 0x3c8b3542, 0xba5ac280, 0x3ce15de2,
0x3cf57a0e, 0xbce80854, 0x3c6ef8b4, 0xbb7b8310, 0x3ce9e72e, 0xbc41c86c,
0x3c65a2bc, 0xbca8c174, 0x3c8dedb4, 0xbcf6252e, 0xbc66e10e, 0x3cab48b2,
0x3c5e095c, 0x3c7e8aa0, 0x3cd19b9c, 0x3ac46ae0, 0xbc7ac2de, 0xbc21edf8,
0xbc909280, 0xbbefeb3c, 0xbc8e3a69, 0xbc1f7984, 0xbb4b6678, 0x3be1bca0,
0x3c35876c, 0x3c48161c, 0x3cf930a2, 0xbcdc4d34, 0xbce476bf, 0xbbd82808,
0xbcb32905, 0x3c3b3928, 0x3c92022c, 0x3c8a843e, 0xbc29f3e2, 0xbce40be3,
0xbcb7ddae, 0x3c9f751c, 0x3c8fa44c, 0x3cb4c8be, 0x3cbeb818, 0x3a9c7160,
0x3c965892, 0x3bc1efe8, 0xbb9d37e0, 0x3ce3048e, 0x3c00c710, 0xbcea4f90,
0xbca42307, 0xbd0511e6, 0xbbd287ac, 0xbca2cb8e, 0x3cb39332, 0xbd0648a0,
0x3c2bd55c, 0x3bb01130, 0x3b341990, 0xbd05ab39, 0x3b9ce9e8, 0x3ba790c8,
0xbb7fbb58, 0x3c64f2b4, 0xbb654188, 0x3cfe6f6e, 0x3ce35eb6, 0xbcde0238,
0xbc19c252, 0xbcda5d05, 0xbca0b232, 0x3cbdde16, 0x3cfbbeb6, 0xbcbb4e80,
0xbcbf95be, 0x39afd500, 0x3c89411a, 0xbcc2a912, 0xbc04b97c, 0xbbecbe60,
0xbcaa17f0, 0x3d07ec8f, 0x3c7f21b4, 0x3cea800c, 0xbc8728b6, 0x3cec756c,
0xbc9976fa, 0x3bb3cc78, 0xbca3d17c, 0xbc625056, 0xbc99576e, 0x3b0c6000,
0x3c31e3e0, 0x3cf2ecc2, 0xbd07c5c9, 0x3c8db84e, 0x3cfb7a6a, 0xbcba9927,
0xbbf6c808, 0xbc880f58, 0x3caf62fc, 0xbcabf752, 0xbc7a409e, 0xbcfcae4c,
0x3c559ad8, 0xbcb1e827, 0x3cf59b5e, 0xbcfbec1d, 0x3c495370, 0xbc895303,
0x3cca674c, 0xbb833500, 0x3c5cbe30, 0x3c0525a4, 0x3d07908f, 0x3cf26032,
0x3c12350c, 0x3a7728c0, 0x3ca2ef34, 0xbcc36b3e, 0x3c66bf50, 0xbcdbd674,
0xbc30c4a2, 0x3ccdf0e4, 0xbcba54ae, 0x3c7783c4, 0xbc756bb4, 0x3c35c734,
0xbd0082fe, 0xbbe8eabc, 0x3c8c534e, 0x3ca7e76c, 0xbc943814, 0xbce8dab2,
0xbbf1a94c, 0xbcf752f2, 0xbaf0d860, 0x3ce16e08, 0xbce67374, 0xbcb21812,
0x3d0856bd, 0x3cd63bae, 0xbcf4424e, 0xbd032756, 0xbc9b20e7, 0xbb29a300,
0xbc410c12, 0x3ba1b2a0, 0xbad8b9a0, 0xbcb7e769, 0xbcb4cef2, 0xbba5801c,
0x3bd98f00, 0x3ceb023c, 0xbc9e5f0b, 0x3b7a2040, 0xbce05b5a, 0x3cd893fc,
0x3cb2dd1e, 0xbcecf863, 0x3c5a0464, 0x3c6fec38, 0xbbcb5268, 0x3c356314,
0x3cf3658a, 0xbc222b80, 0x3c6f9afc, 0x3c173990, 0x3c177e50, 0x3c8ba066,
0xbcc6d7d0, 0x3c77710c, 0xbccc4b96, 0xbb2b2068, 0xbb2e7e10, 0x3c2e8224,
0xba3db0c0, 0xbce7baee, 0xbc3856c4, 0xbd01b3c9, 0xbcb100e2, 0x3c6930f8,
0x3c20e47c, 0x3c9fbaf6, 0x3cd6647e, 0x3bb56368, 0xbcda525c, 0x3ce10a0a,
0xbc1ddbf8, 0x3c51683c, 0x3d060601, 0x3c9ad062, 0x3cd984f2, 0x3cfbbdc6,
0x3bde9258, 0x3b8f5c40, 0xbd04871d, 0x3aff46e0, 0xbc02d9fc, 0x3abc5b60,
0x3cef1636, 0x3ccd2cee, 0xbb7329e0, 0xbc84e5da, 0x3c7c35a0, 0x3cb35d8a,
0x3cd3a1f4, 0x3ce64e9a, 0xbc380740, 0xbcf60a67, 0x3c808e92, 0xbc87e1e9,
0xbcf06450, 0x3cbc3f52, 0x3c9c1d54, 0xbcbac820, 0x3aac32a0, 0x3d0001bf,
0x3c4997cc, 0x3c2571ec, 0xbc902d7c, 0xbbf935e8, 0xbcc7c245, 0xbccd7234,
0xbcc67900, 0xbc99b096, 0xbc3ca092, 0x3c2e42a8, 0x3cb7fa88, 0x3c88c8ea,
0x3c1c8644, 0xbca3c64e, 0xbd072028, 0xbca1fc12, 0x3c92bf5a, 0xbd012dc2,
0x3b676500, 0xbc5a7ab0, 0x3ce5dcd4, 0xbbb3acf8, 0xba726b40, 0x3ce12880,
0x3b897a08, 0x3c3a72f4, 0xbc5ab9e8, 0x3b3248e0, 0xbcd90201, 0x3bdb89e0,
0x3bdb8478, 0xbcd3d9ce, 0x3b3d4f00, 0x3b4f6530, 0xbb275de0, 0x3c6d0078,
0x3cdb8ba2, 0x3c3e2bd0, 0xbcdd07ce, 0xbc68dbb0, 0xbc20a6ec, 0xbb506fe0,
0x3c0b0dd4, 0x3c9bd2f2, 0xbc564c4e, 0x3b43d8b0, 0xbb3cbd00, 0x3b7c7ef0,
0x3bc4e7d0, 0x3ab0a1c0, 0xbc668d6c, 0x3c8b3f06, 0xbc880d2b, 0xba83bac0,
0x3bcbd9c0, 0x3cde0f18, 0xbbdfac68, 0x3c637e04, 0xbc96cc89, 0xbcfd880e,
0x3cddc174, 0x3cee9530, 0x3c813408, 0x3c4590b4, 0xb9962000, 0x3c11daa0,
0x3c9fd4be, 0xbcea5c09, 0x3836c400, 0x3bd2de68, 0x3b877b78, 0xbd02aeb9,
0xbcaf2c30, 0x3d05996f, 0xbc08af56, 0xbbe46e90, 0xbbfb8370, 0xbce53234,
0xbc93e20e, 0x3be51740, 0xbc90ac34, 0xbcf8e2ee, 0x3c87d5b6, 0xbc9ef13e,
0x3cbc9f8e, 0xbc7e5b4e, 0xbbf17aa4, 0xbc97f374, 0xbb6d7b58, 0xbb8b8cf8,
0xbc1b6d40, 0x3cfcde36, 0xbcb75f76, 0x3ca94bb6, 0xbb88fd44, 0xbc8c2069,
0xbcbc8194, 0xbb0c0920, 0x3b9af440, 0xbc8f0820, 0x3b1a32b0, 0x3c66eb1c,
0xbcae0554, 0x3bba47e8, 0xbce5f709, 0x3cbe3f94, 0xbc40c856, 0x3cb44652,
0x3c554058, 0x3ce3e866, 0x3cf63efe, 0xbcdcbb9e, 0x3cd34ae8, 0x3c67f5b4,
0xbca49229, 0xb8b72800, 0x3b795730, 0xba269680, 0xbb0a52f0, 0x3a1e5b40,
0x3cac9cb0, 0x3d05c0cd, 0x3b5b8a60, 0x3cb55ea8, 0x3d03d343, 0x3cccd850,
0xbb00aa30, 0x3c834fba, 0xbc1caef4, 0x3bca8860, 0x3cada96a, 0x3bec4c00,
0xbd01bcdb, 0xbc39132c, 0x3c624468, 0xbcb6d45a, 0x3cb188b2, 0x3be5a708,
0xbcdb135e, 0xbcc60e3e, 0xbc7c659e, 0xbce52421, 0x3c3ac530, 0x3c2f57ec,
0xbc4a0d2c, 0xbcb957a0, 0x3be8b768, 0xbbde881c, 0x3b112800, 0x3c6750d0,
0x3c411de4, 0x3cd5e450, 0xbc9acca2, 0xbaeddc80, 0xbb67e758, 0x3cf3c0f6,
0x3d05b2a3, 0xbc78e5f0, 0xbc86b3c3, 0x3c268c20, 0x3c25ff7c, 0xbc980ceb,
0xbbf54c70, 0x3d00f9e7, 0x3c8aebb0, 0x3c53a918, 0x3c2023d0, 0x3b67ec90,
0xbc27bcf4, 0xbb6e2138, 0xba7412c0, 0xbb703f68, 0x3c071b14, 0xba141180,
0x3cca355e, 0xbd0719d7, 0x3cef542e, 0x3c2769fc, 0xbd02ed8f, 0x3ceff102,
0xbc49d9ac, 0x3c201944, 0x3a8797e0, 0x3bb35010, 0x3c61fb10, 0xbb410cd0,
0xbca7526e, 0x3aff70c0, 0x3cfabada, 0x3c6966a4, 0x3c21ce28, 0x3c8139c0,
0xbc88b823, 0xbcf53ee1, 0xbce3d583, 0x3cbc9ab4, 0x3cdb5076, 0x3bad9208,
0x3b8754c8, 0x3c1f79a4, 0x3c719390, 0xbc006762, 0x3cf31bbe, 0x3b4cb160,
0xbca374a0, 0xbcbc0e5e, 0xbcfc6b8e, 0xbccdcd1e, 0x3cc65ba8, 0x3b04b310,
0x3ccb4b2e, 0xbcbf553c, 0xbcc3ef94, 0x3bb47060, 0xbccf8e09, 0x3ab70140,
0x3bddcdd0, 0x3cc88b4a, 0x3c8ac2e0, 0x3d049ebf, 0xb9e41e80, 0x3c88d992,
0x3ca17420, 0xbc9e413c, 0xb95b4f00, 0x3c918796, 0xbb9e2a78, 0xbcd912a9,
0xbcb3ae74, 0xbc7c4dc6, 0xba96bc80, 0x3c9f1104, 0x3c2418c4, 0x3c42f3a8,
0xbcc5c938, 0xbc2270de, 0xbc3923ca, 0x3aab76e0, 0x3c2aab04, 0xbcf740a1,
0x3b775bf0, 0x3c928e92, 0xbc27098a, 0xb90e6000, 0x3c7625e8, 0x3b5eb9b0,
0x3b9afda0, 0x3cf5c6ae, 0x3c15adbc, 0xbc99da40, 0x3b7aa610, 0x3c8a9aea,
0xb9eec880, 0x3c75f54c, 0x3c41dcf0, 0xbc97cca7, 0xbb1be0e0, 0x3cc6812c,
0xbcd90949, 0x3c6238fc, 0x3c8fc6a4, 0xbc68ac5a, 0xbc7c2c1a, 0x3a26b840,
0x3ca012b2, 0xbca76638, 0x3cbeeaf8, 0xbc9cd3b8, 0xba8667e0, 0xbc86ab8d,
0x3c8c9b76, 0x3bf1c4e8, 0x3bbff6b0, 0x3c600800, 0x3c99e3aa, 0x3c890a4a,
0x3d04969f, 0xbd04baf5, 0x3cd59b58, 0xbcab04d2, 0xbd029f20, 0xbbdf3eac,
0xbc9ac0b6, 0x3acd0780, 0xbc2711c4, 0x3cd3f5fe, 0x3bb13e68, 0xbb692b48,
0xbc1d010a, 0x3ceed050, 0xba1fb980, 0x3c9b9f40, 0xbc46bfa8, 0xbd0237b6,
0xbce5c576, 0x3c89e490, 0xbc6e70f0, 0xbceb481a, 0xbca1088e, 0x3bcf9718,
0x3c85c988, 0x3c8f38a4, 0xbc837a8b, 0xbd084977, 0xbcc45a25, 0x3c26d850,
0x3d080d2f, 0x38ea2600, 0xbb8b5e60, 0xbb610a30, 0x3ca34eee, 0x3cbced10,
0xbc728ff0, 0x3c323508, 0x3c772628, 0x39261600, 0x3bcf35d0, 0x3ceff45a,
0x3be2bcc0, 0xbacc4800, 0x3cc3b5e8, 0x3cee2ab8, 0xbc9d042e, 0x3d0487a7,
0xbc8b69c0, 0x3cc214ee, 0x3c582cd0, 0x3c09bed0, 0x3c355288, 0x3d07cb3d,
0xba28efc0, 0x38e55800, 0xbc9eed30, 0x3cc53d4a, 0x3ccee898, 0xbc8325d6,
0xbcb64d29, 0xbcfb1b89, 0xbcc3eeb6, 0xbb63f8d0, 0x3b5d0920, 0x3c7f3260,
0xbc7eded6, 0x3b92da00, 0xbcd09772, 0xbc1aaeca, 0x3b887af8, 0x3b30f9c0,
0xbc1271de, 0x3c7bf480, 0xbbacc7c4, 0x3bf94050, 0x39eef700, 0x3cfe6be2,
0x3ba6fd48, 0x3cf0c08a, 0x3c864ec4, 0x3c3d8adc, 0x3c39ec04, 0x3c559018,
0xbcf57d98, 0xbb3b4998, 0xbc8bebe5, 0x3ce85a80, 0x3cd94612, 0xbc9c54eb,
0x3c863dd0, 0xbb1b4258, 0x3c520734, 0x3ca4d7f6, 0x3c23a534, 0xbcfd1249,
0x3c136ff8, 0x3c363e40, 0xbc0f48ac, 0x3cacbb8c, 0x3d002fe9, 0x3c3188b0,
0x3ca883a6, 0xbcecd078, 0xba64b400, 0x3cba0cd8, 0x3c2d21a4, 0xbcc0e6f2,
0x3cd5764c, 0xbbaa6ba4, 0x3a229640, 0xbc6aceda, 0x3c2807fc, 0xbc0424d2,
0xbc7c0b7c, 0x3d03326b, 0x3c9c079e, 0x3d0692e9, 0xbc815bed, 0x3bcbd930,
0x3ce50d8c, 0x3b92e060, 0x3b7653b0, 0x3b54a100, 0xbb925abc, 0xbcae2f30,
0xbc49f204, 0xbd041cb9, 0x3c25398c, 0x3c02e064, 0xbbb6a7b4, 0xbcd2ab4c,
0xbb3b6810, 0xbd062bf0, 0x3c831b96, 0x3cb250f4, 0xbace1f20, 0xbd063cb8,
0xbd04f07d, 0xbc5c98ca, 0x3cd69f0c, 0x3cab93d8, 0xbca46390, 0x3ae56760,
0x3c82fb20, 0xbb7d0920, 0xbb598410, 0x3ced847c, 0xbcf9a16c, 0xbbf6d2a4,
0xbb5fc100, 0xbbf5ec70, 0xbc289f9e, 0x3ccbd2b4, 0x3cb125fc, 0xbcc87a74,
0xbcad2560, 0x3ae0a400, 0xbc46ea3c, 0x3bdbeb90, 0x3ccab62c, 0x3d085bd7,
0x3c8defee, 0x3a4ab140, 0x3bae9858, 0x3cac64ac, 0xbc0f6640, 0x3c48f0f8,
0x3b58d8c0, 0xbca89036, 0xbae91ca0, 0xbca5905c, 0x3d021283, 0x3cc9d1bc,
0xbb641230, 0x3cb019ac, 0x3c3106f8, 0xbba6462c, 0xbcb789f6, 0x3b0e20c0,
0xbd028913, 0xbc54c412, 0xbcc32ef2, 0xbc24a00e, 0xbc842a83, 0xbc560b70,
0xbc80f1f4, 0xbcd5a765, 0xbce8c83a, 0x3cd8b576, 0x3d0865c3, 0xbc89f578,
0xbb8213e8, 0x3c7a3ef0, 0x3c9a6d98, 0x3ccfd996, 0x3d0845b9, 0xbc81acfa,
0xbc2001d2, 0x3ce472ec, 0x3cb135c2, 0x3b0551d0, 0x3cc4c242, 0x3cd884d6,
0x3b3b1850, 0x3c7c5150, 0xbc7517ca, 0xbc21301a, 0x3c7bc704, 0x3d08556f,
0x3bb73d70, 0xbb211778, 0x3cac175e, 0x3c6b1df0, 0xbd04d460, 0x3cd28a76,
0xbc868687, 0x399dd180, 0xbc807a20, 0x3ac527c0, 0xbc811eba, 0xbcb3afc7,
0x3cfc0b46, 0x3bb40008, 0xbc7a8480, 0x3cfa7bee, 0x3ce051c2, 0xbcc7c13c,
0x3cc39354, 0xbccf8f92, 0x3cfec29a, 0x3ae72640, 0x3c913058, 0xbcbfbdd6,
0xbccf56ba, 0xbc0d2c34, 0x3c181fd4, 0x3cfedab2, 0x3c32d850, 0xbc686362,
0xba947f20, 0x3cf212ea, 0x3ce684ac, 0xbc8232b6, 0xbc611812, 0xbb1a0558,
0x3cde538c, 0xbca671d6, 0x3c6983b0, 0x3b982af0, 0x3cd71bbe, 0x3ce264dc,
0xbccc30b0, 0x3d07925b, 0xbb888cd4, 0x3c1a6750, 0xbc925e05, 0x39cde380,
0xbcca592c, 0x3c993e04, 0xbbd516e0, 0xbcd39436, 0x3c3f6948, 0xbcb53bde,
0x3c217a80, 0xbc734b9e, 0x3bc88528, 0xbd001508, 0x3c6ddefc, 0x3cc3da62,
0x3c8492e0, 0x3baa9d08, 0xbcd78e4e, 0x3ccdddf6, 0xbd076d9f, 0xbca14969,
0xbc89477c, 0xbc7c7d5e, 0xbb3e9b78, 0x3cba6fd8, 0xba1ed040, 0xbc9f92a2,
0xbcdc14b8, 0xbc12d940, 0xbca06225, 0xbb73b498, 0xbcf71387, 0xbca9c7e7,
0xbc401bde, 0x3d027ebd, 0xbce5ac21, 0x3bb2dd60, 0x39b74e80, 0x3cac36a6,
0x3cee8dc6, 0x3ca42f80, 0xbca80fb8, 0xbca1a9ce, 0x3cd02acc, 0x3cff8f36,
0xbcb035a7, 0xbc86df36, 0x3c895d9e, 0xbb010d50, 0x3c3fbe24, 0xbca37372,
0xbbd679f8, 0x3c0e3900, 0xbcded5f8, 0xbc362b28, 0x3cfb7202, 0xbc224740,
0x3c6f4b60, 0x3b249ea0, 0x3ce1555a, 0xbc5efff8, 0xbc72b468, 0xbc0aef2c,
0x3cc4659a, 0xbaad4380, 0xbcc72a7e, 0x3b05b830, 0xbc906625, 0x3d03b25f,
0x3c8675d2, 0xbb020940, 0x3b870b18, 0x3c6ea2ec, 0xbc839ed2, 0xbc95022b,
0x3cdd0efe, 0x3c8e6b94, 0x3c6b2ae4, 0xbd01e45a, 0xbcd3c912, 0xbc4f0b16,
0xbcfbd26c, 0x3d012885, 0x3cf7cc6a, 0xbcc5f110, 0x3cfb654a, 0xbceb9478,
0xbd066a7b, 0x3cb263ce, 0xbd075302, 0xbca2b105, 0xbc8ae6b4, 0x3cdc000c,
0x3c99a3ac, 0x3c66e8ac, 0x3b73a580, 0x3c6f9658, 0x3cb3f92c, 0xbb1ac588,
0x3ae51580, 0x3c455478, 0x3b72a730, 0x3cc69b20, 0x3cf0bd3e, 0x3c2fc1b8,
0x3c881b76, 0x3c228438, 0x3c8202d8, 0xbc8f8cb4, 0xbc19790e, 0xbce8c5a9,
0xbc9bb7fa, 0xbd042ab2, 0xbc09cada, 0x3ceff31e, 0xbcc53527, 0xbc20125e,
0x3cfb50e6, 0xbc8a1f1c, 0xbcf064d2, 0x3c0a9a44, 0x3c2ffb74, 0x3d06df61,
0x3c85cb60, 0x3b0f1cf0, 0xbca1daa5, 0xbc626fc0, 0xbcf89569, 0x3ca82902,
0x3cfa151a, 0x3c4d1cb8, 0x3c95953e, 0x3c75e4c0, 0xbc96a1a5, 0xbcadb9e9,
0xbb564ad0, 0x3c9fd6ca, 0x3c11103c, 0x3c781618, 0x3d01b7cd, 0x3c9a5332,
0x3c81b95c, 0xbca25ecb, 0xbcd0462c, 0x3ba95b00, 0x3b100a60, 0xbc167680,
0xbc71a5b4, 0xbd05eace, 0xbc55eff8, 0x3bc1d7c8, 0xbcf8a26c, 0x3adf2040,
0x3c973cc2, 0xbc299eac, 0x3cd58596, 0xbc4936b0, 0xbbaf8210, 0xbcded001,
0xbd04c3fd, 0xbc5055f8, 0xbbde7dd8, 0xbcbcff9a, 0xbcd688a7, 0x3bc759e8,
0xbc715d1a, 0xbcdfe1c9, 0x3c59054c, 0x3d00e963, 0x3c310d6c, 0xbd00c913,
0x3ba053b0, 0xbc9ea8d2, 0x3cb64da4, 0xbc4c1f3c, 0x3c99e57e, 0xbce09e7c,
0x3c221940, 0xbcc2b3f8, 0x3c2f0d0c, 0xbc05d274, 0xbb07ae90, 0xb9f01500,
0xbbf8c744, 0xbc79be5a, 0xbcd1f01c, 0xbccee11a, 0xbcd6c525, 0xbcd31621,
0x3b5daec0, 0x3ad21080, 0x3b0bd470, 0xbbcdfb60, 0xbcfa1036, 0x3af4e9c0,
0x3c58bc58, 0x3cae1f16, 0xbb8ac8d4, 0x3cd874fc, 0xbcef4496, 0x3d077d85,
0x3c53131c, 0x3bb24160, 0x3c8f6ba6, 0x3c8789e2, 0x3c30b3a4, 0xbc92e285,
0xbcc14d50, 0x3c8ae082, 0x3bee3e48, 0x3b6be940, 0xbd0663dd, 0xbc26f4e2,
0x3b98eb48, 0xbcfdd825, 0x3c1208d4, 0x3cb6d790, 0xbbc5863c, 0x3cc3db4a,
0xbcc25c52, 0xbc8a5f9a, 0x3c512028, 0xbc88a30f, 0x3affbc80, 0xbcd9dae1,
0xbcea2bd4, 0xbd041e89, 0xbc510d9e, 0xbc331eb4, 0xbc83865e, 0xbcbaacc3,
0xbbe884a4, 0x3c7a9b78, 0xbc0aab38, 0xbc71dfca, 0xbcd10df4, 0xbc8911f8,
0x3c5b59e0, 0xbb2fa448, 0xbbcf8d34, 0xbcb92fbc, 0xbccb8ca9, 0x3a8387a0,
0x3c64f1b8, 0x3ba10cc0, 0xbc8f796d, 0xbbd9941c, 0x3d023095, 0xbaa41f80,
0x3c718c68, 0xbbf14b08, 0xbbe2ad60, 0xb9fde580, 0xbcc5e63a, 0x3cd9545e,
0x3ccd066e, 0xba9188a0, 0x3cb0637c, 0xbccafae9, 0x3ca6512c, 0xbcaf97b2,
0xbc9e2b67, 0xbbbe9c00, 0x3cefd74e, 0x3c44fad8, 0xbcbfd7e3, 0xbcf8ffb8,
0xbc0d3f80, 0x3c944bbc, 0x3d0284d5, 0x3cf31682, 0xbcf6e33a, 0x3becadb8,
0xbcdb1f76, 0xbca29585, 0x3cf70a36, 0xbce64ce1, 0x3cbf21f6, 0xbb86accc,
0xbb82d144, 0xbb86e2a4, 0x3cb144ee, 0x3c41b0ac, 0x3c29d500, 0xbd02ccfd,
0x3c295bdc, 0xbbd50ff8, 0xbc20074a, 0x3c1185ec, 0xbcb8bd8c, 0x3cc662ce,
0xbc02e3da, 0x3ba90f28, 0xbc7e4d5a, 0x3c7bb3e4, 0x3c7578c0, 0xbc8638f4,
0x3c8932e6, 0x3bcb98b0, 0x3cffe982, 0x3bd15de8, 0x3c11511c, 0xbb74d2e0,
0x3c397ec8, 0x3c5a8b5c, 0x3b4b3f10, 0x3c925614, 0x3bb4fdf0, 0x3b437ee0,
0x3bdca5c0, 0xbced3e05, 0x3cf667ba, 0xbc490970, 0x3c25bd30, 0x3ae84600,
0x3c3daa0c, 0x3cb48bba, 0x3c92b91c, 0x3ce8accc, 0x3c6806f4, 0x3c474b98,
0xbcbda465, 0xbc282952, 0x3ba98f00, 0xbd015790, 0xbb75dac0, 0xbc8958a0,
0x3ce9ecf8, 0xbc7ac2f8, 0xbc35c722, 0xbcd5724c, 0xbb970b70, 0xbcfac23f,
0xbc59223c, 0x3d064b6b, 0x3cd9edf0, 0xbc996b56, 0xbc87ae27, 0xbccc0b52,
0xbb9f8ef8, 0xbca4b58e, 0xbbe28f60, 0x3be20178, 0x3bd416c8, 0xbca49a65,
0x3b57e510, 0xbc5c1822, 0x3cf8b3fe, 0x3c725b78, 0x3ca8bad8, 0x3becbf40,
0xbc896c76, 0x3bcbe6c8, 0x3c882fb0, 0xbd06f482, 0xbc89658b, 0xbcf8d2b4,
0xbc6a5ef0, 0x3bf410c0, 0xbc91a6fe, 0xbcfaa2e5, 0x3cfa6bb2, 0xbcbf90fc,
0x3ce321b4, 0x3c8cd4e2, 0x3cb4ed6e, 0x3d07058f, 0xbb913f68, 0xbc30e016,
0x3cfba0be, 0x3cafb562, 0xbc7afca8, 0x3c760d78, 0x3cffd0b6, 0xbbfa40a4,
0xbb652320, 0xbca00bb8, 0x3cb2cf52, 0xbc2cb5e6, 0xbb24c730, 0x3bbf30f0,
0x3ca9129e, 0xbc110734, 0xbc95aa5c, 0x3b5d6200, 0xbb779658, 0x3c21a1e0,
0xbba74de8, 0xbc8114cd, 0xbc7a81f4, 0x3afc0960, 0xbbbe3368, 0x3cb80172,
0x3c5be36c, 0x3ccbf4f6, 0x3cfe4eea, 0x3d03d2c3, 0x3d010235, 0x3bdc42d8,
0x3cb27b2c, 0x3c97042c, 0x3bd8df90, 0xbc1548fc, 0x3cbc4482, 0x3c3557f8,
0xbc93bfc2, 0x3cdb6014, 0xbc4df98a, 0x3cc0033c, 0x3ba6f1e0, 0xbcdd3bc9,
0xbc0b12ce, 0x3c9b0eb2, 0xbba77be0, 0x3caffcbe, 0x3c5cad38, 0x3ccb8db2,
0xbc431178, 0x3a33fa40, 0xbc8160d6, 0x3bbcf268, 0xbc4bef80, 0x3d07b6b9,
0x3a0c3300, 0xbbeb6a80, 0xbcaf4190, 0xbcd16ca7, 0xbc6da080, 0x3ceb5bee,
0xbb9f38cc, 0xbc00d192, 0x3b785300, 0x3c06b97c, 0x3b93c240, 0x3c8bd8b4,
0x3cfa14fe, 0x3c568320, 0x3ca499da, 0x3c2a230c, 0x3c19eff8, 0x3c2482e0,
0xbb3d2220, 0x3c5f90a8, 0xbc56b300, 0x3c15a410, 0x3cbe3234, 0xbaed7980,
0x3ce88b92, 0xbca9491e, 0xbcee77f2, 0x3ca6fbfa, 0x3c29c8ac, 0xbbea649c,
0x3bb58118, 0x3cdb2380, 0xbc98d760, 0x3c42f7a4, 0x3d0277ed, 0xbc16893c,
0x3cfe39f2, 0xbcf99b8c, 0x3d030f63, 0x3cee1518, 0xbcf60b83, 0xbc48bcd6,
0xbd0517d7, 0xbcee31c5, 0x3c5551e4, 0xbcb993e0, 0x3c0a2330, 0xbca4f350,
0x3c697358, 0x3c3a4504, 0xbcad56dc, 0xbcdab9a9, 0xbca75e82, 0x3d06e983,
0x3c5a71a8, 0x3c9e62de, 0x3c1dd1e0, 0x3bf23190, 0xbc8f3a9a, 0x3cf537c2,
0x3c242040, 0x3a120040, 0xbbdd1ecc, 0xbca04b92, 0x3be20df8, 0xbc84a4b6,
0xbb8f3ec4, 0x3c83d1da, 0x3bb453c8, 0xbc24b8fc, 0x3cc4ba92, 0xbcb09eae,
0xbc2fc592, 0xbce635b0, 0xbcc36103, 0x3c2033fc, 0xbc80c620, 0xbc5c8a62,
0xbd00982d, 0xbc367e52, 0x3c94e682, 0xbb18a588, 0xbcf5cf30, 0xbcf262c5,
0x3cdd5952, 0xbc64d3e8, 0xbca6c907, 0x3cf0221e, 0xbcab3122, 0xbcc67483,
0x3cdbee8a, 0x3ab36c60, 0xbd055b2b, 0xbd0171ad, 0x3cddc1d4, 0x3c78b6cc,
0x3cb1c924, 0x3ce5a9b2, 0x3cf4562e, 0xbc7189ce, 0x3cc552be, 0xbc01984e,
0xbbcc9558, 0x3c82857a, 0x3ca8b45a, 0xbc0d5038, 0x3c8a2784, 0xbcd209ac,
0x3cbfac22, 0xbc903992, 0x3ce3dd7e, 0x3c4ade90, 0xbb8d8388, 0xbc0bf322,
0xbce40b12, 0xbc08d2c4, 0xbbcf3f10, 0xbc9dbcd8, 0xbb757868, 0xbc133c56,
0xbc3b3884, 0xbbc0ea44, 0xbbcc339c, 0x3bf3e970, 0x3c612c38, 0x3c9aea24,
0xbca0e329, 0xbc8f8ce5, 0x3b391e20, 0x3c0743d4, 0xbc7100b4, 0x3c64efc0,
0x3cbd34f2, 0xbc091d9e, 0x3b8f85d8, 0xbce1e112, 0xbcf83f38, 0x3c844576,
0xbb518158, 0x399b3200, 0x3ca199f0, 0x3cf71c4a, 0xbbd42b10, 0x3cbde68e,
0x3b3decf0, 0xbce11d87, 0xbb0cc078, 0xbce45c32, 0x3c43f468, 0xbb547598,
0xbbaadf00, 0x3cab0e3c, 0x3cce26f0, 0xbbb28100, 0xbaf39ec0, 0xbccbc369,
0xbae36720, 0xbc43dd6c, 0xbc3d8eb0, 0x3cf4d142, 0xbcbafd72, 0x3c32f6e0,
0xbb84964c, 0x3bbd58f0, 0xbb878990, 0xbce332e1, 0x3ca67338, 0x3cdf8a1a,
0xbcce5a47, 0x3ace26e0, 0xbb9b8b34, 0xbc85f0f8, 0xbcc4c730, 0xbca5c9d6,
0xbc952af6, 0x3c0ab3dc, 0xbb0f0878, 0x3cbd885c, 0xbb859890, 0x3cedb1ba,
0xbab99e80, 0xbc58f14a, 0x3bb51110, 0x3ce09932, 0xbcb5d9ac, 0xbc73df40,
0x3cd64410, 0x3cc52fc0, 0x3b025c20, 0x3bf84140, 0xbcd92003, 0xbc7f8a78,
0xbd04a714, 0xb8ed9800, 0xbc6b9700, 0x3c4903fc, 0xbcfe6c32, 0xba821420,
0x3c224138, 0xbcde6e72, 0x3bfcb690, 0x3c179a88, 0xbc94245a, 0xbcd2b4f8,
0xbc17d444, 0x3cdb2d12, 0xbd031c6b, 0xbcff77e7, 0xbbc303e8, 0xbcaef625,
0xbbfc5058, 0x3d077697, 0xbcc22e94, 0x3c5585e8, 0xb732e000, 0x3c51abb4,
0xbc60da30, 0xbcccd774, 0xbc90ca76, 0xbb8a7e70, 0x3cf46ba2, 0xbc615830,
0xbc1be238, 0x39ac7580, 0x3c451fa8, 0xbcdd102e, 0xbcddcf76, 0x3ceb918a,
0xbcedd2fc, 0xbba2954c, 0x3ce29b2a, 0x3cf9262e, 0xbcd8746e, 0x3c3c5b8c,
0x3d00f4b5, 0xbc7566ec, 0x3cf1e40a, 0x3cb4efca, 0xbd03898a, 0xbcbb4f43,
0xbc3fee78, 0x3cfefe7e, 0x3b98c750, 0xbd00a394, 0x3bead0d0, 0x3b8a2f80,
0xbca7883e, 0x3d03f5c5, 0xbcb2ada9, 0x3be4b8d8, 0x3cdd4c7e, 0xbc8a4bde,
0x3c054e94, 0x3ca5bba6, 0x3c0da66c, 0x3cb9a6da, 0xbc93ca69, 0x3ca89d66,
0xbbed9090, 0xbd02fca0, 0xbc7b4962, 0x3c16b6f4, 0xbc105e62, 0x3c88fb06,
0x3cee3aec, 0xbcda8176, 0x3b18c550, 0x3c6f9088, 0xbc7a4678, 0xbcdf5f03,
0x3d048979, 0x39974780, 0x3c1bafc4, 0x3c32731c, 0x3cdb015a, 0xbd072fcb,
0xbb909b70, 0x3caafcee, 0xbca9a702, 0x3ce50694, 0xbcb946ae, 0x3c5a4d8c,
0x3c776370, 0x3cb47aee, 0x3b144e30, 0xba2ecbc0, 0x3cdd2cfa, 0x3c0c5e8c,
0x3b90be48, 0x3ce13622, 0x3b60c960, 0xb9ad5200, 0xbccc3952, 0x3c839c24,
0x3cd6cd3a, 0x3c8a5350, 0xbc90c652, 0x3c273a8c, 0xbc496e04, 0x3ccec760,
0xbc3d8130, 0x3c9a5790, 0xbccf7927, 0x3c9850bc, 0xbc8c5036, 0xbc1998b0,
0xbccc6cb4, 0xbcabe992, 0xbc6b5956, 0x3cfa5006, 0x3c63f6ec, 0xbc03a9c0,
0x3c1c9514, 0xbc65e956, 0x3c811300, 0xbb8cdd08, 0x3cab34e2, 0x3ccf0678,
0x3c60903c, 0xbce9cab0, 0x3cff91ee, 0x3b0a9290, 0x3c909fe2, 0x3c01e3ac,
0xbcf09eb0, 0xbcc7fdd8, 0x3b25ac50, 0x3a787440, 0x3cf41662, 0x3c102f14,
0x3a304a40, 0x3b4fb7b0, 0xbd025e8e, 0x3cd64278, 0xbc0cf13c, 0x3bf58f40,
0xbd072855, 0x3cecd648, 0x3bbb0928, 0xbc2ce778, 0xbc309b12, 0x3b8c99f8,
0xbba8f370, 0x3cf43a86, 0x3bd94668, 0xbcaa5e18, 0xbbb6161c, 0x3b95dbf0,
0x3c6dbcc4, 0x3c8d5b24, 0x3c7c49f4, 0xbccf4cd0, 0x3bf82f70, 0xbce838f2,
0xbcc86ddc, 0xbcbbf870, 0xbc9286d8, 0x3cc315b6, 0xbc0d1ab4, 0x3d041bc1,
0xbcc2ced0, 0x3c898e6c, 0xbb01c8b0, 0x3c78fca4, 0x3c88b30a, 0xbb1d6500,
0xbcb29607, 0xbc55b7f4, 0x3b6541a0, 0x3c3adaa4, 0xbc4b6e0a, 0xba310b00,
0x3cad7e60, 0x3cd3bbea, 0xbd0710f3, 0x3bf99a28, 0xbcee6783, 0xbcd1fbc3,
0xbcff8c32, 0x3b2de760, 0x3c85e2bc, 0x3c225a0c, 0x3c7089ec, 0x3cfadfee,
0xbb39f700, 0xba9c4c40, 0x3cc2587a, 0x3ad98420, 0xbc891b3a, 0x3c929484,
0x3c5b714c, 0x3cb77140, 0x3c81fb3e, 0x3c1ddddc, 0xbc272b74, 0xbc8004d4,
0xbcf83f81, 0xbca40800, 0x3cee3c16, 0x3cdcf55c, 0xbc9bd5eb, 0x3b31c050,
0x3cc192ba, 0xbad2f7e0, 0xbc8bcad2, 0xbcabec49, 0x3c3e5cbc, 0x3ca47d1e,
0xbcb624b8, 0x3cacd350, 0x3c86b740, 0x3bcacca8, 0x3ce795fe, 0x3c865f94,
0x3c335cc0, 0xbc48b09e, 0x3c8c6676, 0x3cf56e2e, 0x3cfad54a, 0xbbd5094c,
0xbcb5f870, 0x3be0d910, 0xbc2003d6, 0xbcf35f85, 0x3c974afc, 0xbc11e692,
0x3cb93bf0, 0x3c8e9842, 0xbc777a22, 0x3ce3b1fe, 0xbc9649ae, 0xbcfd9416,
0x3cfb7b7e, 0x3c6dd6c4, 0x3ca5f416, 0xbc6846b4, 0xbc3d9430, 0x3c3a61c4,
0x3b080ee0, 0x3c854c2c, 0x3c86e442, 0x3ce59e08, 0xbcbf9792, 0xbcb9fb6e,
0x3c6674e8, 0xbca52d89, 0x3bef2858, 0x3c5ca048, 0x3c9328ea, 0x3c8429cc,
0xb9b56600, 0x3d017dbb, 0x3b80d380, 0x3cd5f764, 0xbcc2c9dc, 0xbb00f2c0,
0x3cbaf692, 0xbc4dc70a, 0xbc816274, 0xbbcffbac, 0x3bce7118, 0x3bc7da10,
0xbc9f8512, 0xbc97a6de, 0xbc1469da, 0x3c9269aa, 0x3cb89544, 0x3c9b692e,
0xbcb2dc30, 0x3cc03fda, 0x3ced6b48, 0x3c4be2cc, 0x3cd1fb1c, 0x3ca287bc,
0xbb83eef8, 0xbceaba38, 0x3cc1bcec, 0x3cc20e14, 0xbc09d316, 0x3c77650c,
0xbc87d1eb, 0xbc1040b4, 0xbc9dfc90, 0xbc46351a, 0xbcd19b52, 0xbd03ecdd,
0x3c53e170, 0xbcc6cfde, 0x3cdf14ee, 0xb942ba00, 0xbba498bc, 0xbaf51ac0,
0x3caf214e, 0x3c91eaca, 0x3c4d1248, 0xbb66c120, 0xbcffc134, 0x3cd3cccc,
0xbcc0de12, 0xbc3f1452, 0x3cdf777c, 0xbc153456, 0x3a8a4280, 0x3b5c5210,
0x3d063d21, 0x3c9819dc, 0xbba7f3e8, 0xbc98f1c9, 0xbb7237e0, 0xbc296ad2,
0x3cf493a6, 0x3cea3096, 0xbcd7aaf0, 0x3cdc22fa, 0x3c37b5e0, 0x3bc3b6f8,
0xbc6cef70, 0xbcbac443, 0x3cf74e7e, 0x3cbb6b94, 0xbcb887a0, 0x3caf44ca,
0x3d0586ed, 0x3cf087be, 0xbcf1b8c3, 0x3c409c94, 0xbc2ea070, 0x3c838c5a,
0xbcfd5a52, 0xbc9a106e, 0x3cad01e2, 0xba5887c0, 0xbcb81a8c, 0x3a28d4c0,
0x3c93d4cc, 0xbbec2890, 0x3c14e20c, 0xbbb3e588, 0x3b901de0, 0x3cb4c838,
0x3ce3cdaa, 0xbca2c660, 0xbca49e52, 0xbb3b1a10, 0xbc89c3a9, 0x3cc9e23a,
0xba077a40, 0x3c8dee9c, 0xbb0f8200, 0xbcca77b6, 0x3d01b6a3, 0xbc93a207,
0x3cd24e4a, 0x3b28fb80, 0xbcddc2c9, 0x3c81739a, 0x3c7d9378, 0xbc1b41d6,
0x3d0482fd, 0x3cdfd30c, 0x3c1b70bc, 0x3c8c97f6, 0x3cfc96f6, 0x3ce5d134,
0xbc86b83e, 0xbbde6800, 0xbbbc5390, 0x3c067db4, 0x3b36df80, 0x3c39c950,
0xbc8e241e, 0x384f4800, 0xbc8cca16, 0xbcc9d752, 0x3cc3ac4e, 0x3c58509c,
0x3cd915d8, 0xbb54c4f0, 0x3bca4118, 0xbac029c0, 0x3cfe645e, 0xbcbcafc9,
0xbc647f3c, 0xbc13e252, 0xbceb0dd6, 0xbc7c4280, 0xbcf4ee38, 0x3c9385e6,
0x3cc63156, 0x3c065614, 0x3bf66ce8, 0xbb219cd0, 0xbc8922e7, 0x3c2350c4,
0x3cfd73ce, 0xbb4c2648, 0x3d06b483, 0xbcc5490c, 0xbced9a3a, 0xbc5290e8,
0xbcc0f476, 0x3b033bd0, 0xbc2d1796, 0xbc10aed6, 0xbcf2762e, 0xbc2d2d2c,
0x3ceb5f4a, 0xbd072fe4, 0xbcf5f3f0, 0x3bf4c740, 0x3a7e76c0, 0x3ce2cf78,
0xb9c43800, 0x3c69976c, 0x3c07f838, 0x3ca754d8, 0xbbfd4890, 0xbcea7156,
0x3cfa4c52, 0xbbf6dc70, 0x3bf03178, 0x3ca5242c, 0x3cd0dfb4, 0xbbd57290,
0xb9ded580, 0xbbb8849c, 0xbc57defc, 0x3b0e5140, 0x3cf08bee, 0x3b6479b0,
0x3c19fc50, 0xbca7996e, 0x3cb4989e, 0xbca6f685, 0x3b905770, 0x3c943ad8,
0x3cb9a884, 0xbd02fb7f, 0x3c324520, 0xbced2467, 0x3c32c130, 0xbc45e6fc,
0x39c59100, 0xbcc75434, 0x3c33ddfc, 0x3cdd3ac8, 0xbb7d1b98, 0xbcaab509,
0x3ca6efb2, 0xbc932129, 0x3bacdb10, 0x3cc83ef4, 0x3c49206c, 0xbc913570,
0xbca464ab, 0xba8fd3e0, 0x3c53e098, 0xbd01d96e, 0xbbc080ac, 0x3bd3ec18,
0xbcdc0fac, 0xbccac752, 0x3cd04de0, 0xbcef8683, 0xbcc73d98, 0x3cd46090,
0x3c49129c, 0xbc90860d, 0xbb620900, 0xbce16cd6, 0x3c73b068, 0x3cc1cca4,
0x3c9f8cb0, 0x3c0ae7bc, 0xbd007e70, 0xbc6bf64a, 0x3c10ce40, 0x3c806842,
0x3c6b8f9c, 0xbb149d30, 0xbc943165, 0xbba8c2f8, 0xbc6602ca, 0x3bf70880,
0xbca78598, 0x3bd60950, 0x3c91b7b6, 0xbc028270, 0x3b95c258, 0x3bd31868,
0xbcb8cf4e, 0xbadefb80, 0x3cc43e7a, 0xbc60cb92, 0x3b84f658, 0x3c9a6fdc,
0xbc5bbd00, 0xbc0e2aca, 0xbc035cfc, 0xbc90d0c9, 0xbcaac33a, 0x3b30caf0,
0x3c295ce8, 0x3cb869b0, 0xbbf15acc, 0x3cd54d8e, 0xbbe3bbf0, 0xbb5b04d0,
0xbbe1b92c, 0x3c000814, 0xbceca496, 0x3b779400, 0xbbc76cb4, 0xbd00f6b4,
0xbc8d933c, 0x3b868df0, 0x3cf0f66a, 0xbc82fc69, 0xbcf99e34, 0x3cb27ace,
0xbcd55074, 0xbc1ab3ac, 0xbc8a19fa, 0xbce33b2c, 0xbca12f94, 0xbb80169c,
0x3cc3a4e4, 0x3c97d420, 0xbcc23a58, 0x3cb51f92, 0x391c5e00, 0xbcd65816,
0xbc2689ac, 0xbc8ffebc, 0xbcba6b65, 0xbcbdf2c0, 0x3c0cc57c, 0xbc5d92e2,
0xbac82ae0, 0xbc237ea6, 0x3b7791a0, 0x3cff167a, 0x3cb9dac0, 0xba552f40,
0xbc9a7367, 0x3b8c9528, 0x3b4f7f80, 0xbc5021b8, 0x3cf19bd2, 0xbc270cac,
0xbc10dfec, 0x3c3118c4, 0x3cfe03a2, 0xbb152ae0, 0x3af8a1c0, 0x3bbb4c88,
0x3c6dc43c, 0x3ca2f458, 0x3d0859ad, 0x3bd38108, 0x3caa3ba2, 0xb92fc900,
0x3ccf9f42, 0xba820b80, 0xba6fab40, 0x3c547820, 0xbc88d685, 0x3c03badc,
0x3cb5d768, 0x3c9820ac, 0xbca33189, 0x3c81175a, 0xbabef0c0, 0xbc8005de,
0x3cb58ad4, 0xbc8e5374, 0xbcf907ce, 0x3c606cec, 0x3cccb070, 0xbccdf9b4,
0x3a8ed440, 0x3bf221a8, 0x3d00eb85, 0x3cfb2316, 0x3cd031c4, 0xbc126e78,
0xbccac8e5, 0xbc092570, 0xbc08f39e, 0x3c94d7d0, 0x3c8ed01c, 0xbb08c9c0,
0x3c783380, 0xbca348d6, 0xbcdce90e, 0x3c296fa4, 0x3c19b4d0, 0xba72c1c0,
0x3c463cbc, 0xbb4e4c30, 0xbc9581e0, 0xbbe894ac, 0x3cc0110e, 0x3c567dd8,
0xbd079242, 0x3d02a0b9, 0xbcda7594, 0x3cc4a896, 0x3cd7a102, 0xbc31d4e8,
0xbc91580b, 0x3cbc3630, 0xbcd1b3de, 0xbce0b232, 0xbcd8a7b4, 0xbbfe6b68,
0xbc9a3e3e, 0x3c7cd780, 0x3cab28e4, 0x3bf67e00, 0x3bf45a98, 0x3b34c7b0,
0x3cac600e, 0x3cb8c0ee, 0xbc10e84a, 0x3c93942a, 0xbc835c69, 0xbc874abe,
0x3cfe586a, 0x3c55092c, 0x3c998860, 0xbcb7bb92, 0x3a398e40, 0xbbfb08b4,
0x3cc4bdfa, 0xbd0179c8, 0xbbbb879c, 0x3d04662f, 0xbc81349a, 0xbcadd850,
0x3c3bfcc4, 0x3cf4e94e, 0x3c92d9e4, 0x3cdae0e0, 0x3ccc4c10, 0x3cc2d3c2,
0x3bc41e98, 0xbc316b74, 0x3c9f2436, 0x3cb208f4, 0xbcc2c210, 0x39d52080,
0xbb1b35a8, 0xbc74493c, 0xbc94a0be, 0xbcac5010, 0xbc6cf9ec, 0xbd016825,
0xbcabafee, 0x3c6b4080, 0xbcae91d0, 0xbc87876b, 0xbb0e5278, 0xbc1e43d6,
0xbc89471e, 0x3a792040, 0x3cd39710, 0xbca856b4, 0x3ceb18ec, 0x3c2df9c0,
0xbce2eb89, 0x3cac9d54, 0x3c529e78, 0xbc4499a8, 0xbc73cca2, 0xbc3e9200,
0xbcb196e2, 0xbb5a73f0, 0x3c10d424, 0x3c4347e0, 0x3d052a21, 0xbbc07a10,
0xbccd0469, 0xbbbda124, 0x3c06a188, 0xbc9fca45, 0x3cbce2be, 0xbcba9fa0,
0x3d069fff, 0xbcdd768e, 0xbc2aabbc, 0x3d03628d, 0xbc821212, 0x3cd850ac,
0xb9a97d00, 0x3c9b54e4, 0x3cffa576, 0xbb306020, 0xbc97c870, 0xbc092178,
0xbc9a433c, 0x38fbba00, 0x3b6db9b0, 0x3cae3c4e, 0x3c868404, 0x3c77936c,
0xbcf4ad76, 0xbc9519d2, 0xbce868a7, 0x3d0258ab, 0x3caf0760, 0xbcbb6727,
0xbcefee87, 0x3bd6b558, 0x3ba51868, 0x3aa09dc0, 0x3cace854, 0x3bf1af00,
0x3c9d4d96, 0xb9e16600, 0x3aa309e0, 0x3bb5d0f0, 0x3ba5c0e8, 0xbc2706f4,
0xbc3c2d68, 0x3ca0c67c, 0xbcbc3712, 0x3cd7e892, 0x3c9a4ca2, 0xbbd3c9e0,
0x3ba71b10, 0x3cf872c6, 0x3cf8262a, 0xbc5d9af4, 0xbceeb1d0, 0xbd0851d6,
0xbcfebe38, 0xbcc19e69, 0x3c8e3404, 0xbce02aff, 0x3cf8cc2e, 0x3b927af0,
0xbbb2cec4, 0xbcd1db7c, 0xbc1d6752, 0xbb66ca20, 0x3c3d24b8, 0xbc901a78,
0x3d015185, 0xbba40fd4, 0xbc4b5168, 0xbc1cf6fc, 0x3c197c84, 0x3c536ea0,
0xbcd76f8c, 0x3b0af110, 0xbc8eefb6, 0xbb7e09c0, 0x3ce6a054, 0x3ce6187e,
0x3cb4d80c, 0x3bdaeb68, 0xbcd227d6, 0xbcb259a9, 0x3ad60f60, 0xbd0343aa,
0x3ce7f220, 0x3bd95478, 0x3ac64a40, 0x3b2accf0, 0xbca5b6f4, 0x3ca91db2,
0x3b36e9a0, 0x3acf0de0, 0x3cc7697c, 0x3cd4fe9c, 0x3cb96ec4, 0xbb7665e0,
0x3cfb295e, 0x3c8b574e, 0xbb197400, 0xbc0cd8d6, 0xbc2690da, 0xbc2709ec,
0x3c8f8f54, 0xbcdec5d4, 0x3c90eab8, 0x3cf5cd9a, 0x3c6a9ddc, 0x3cb00db0,
0x3c8b9f66, 0xbcd01c30, 0x3bfd2208, 0xbbc3d668, 0xbc8203b2, 0xbce0bf5c,
0x3cac1132, 0xbb484658, 0xbcbc193e, 0xbcadc67a, 0x3cb05378, 0x3b9e5580,
0x3c62ffac, 0x3c982f4e, 0xbb4ce1e0, 0xbc3c35a2, 0x3cab481a, 0x3b239210,
0xbbef0144, 0xbcd40950, 0xbc05a4b4, 0xbb3d0220, 0x3cc4a362, 0xbbc72970,
0xbcae4707, 0xbcb11310, 0x3cd1d7c2, 0xbceb50fa, 0x3ce35e8e, 0x3c96304a,
0x3c63585c, 0xbcd107c5, 0xbcc526cc, 0x3c93daae, 0xbcc40e29, 0xbc876763,
0xbb548cf0, 0xbad0e680, 0x3cda8d8a, 0xbcdb2967, 0xbc136a8e, 0xbcc601fc,
0x3c059438, 0x3c595b5c, 0xbc588d6c, 0x3cb2e0cc, 0xbcb73f9e, 0x3c11da08,
0xbcce542e, 0x3ca4aa78, 0x3bcda018, 0xbcf664e3, 0x3c658e78, 0x3c998bda,
0x3c2f8e3c, 0xbc975ec5, 0xbd013f3c, 0xbc564740, 0xbcb82ab0, 0x3d0812d3,
0xbcc6603e, 0x3c4f3978, 0x3cdf3bc8, 0x3ce7d418, 0xbc4d583c, 0x3ce67cb8,
0xbce87889, 0x3abff540, 0xbcf85d21, 0xbc7c30f4, 0x3c333be8, 0x3c6bec34,
0xb68b8000, 0x39771400, 0x3c1c3ad4, 0x3cf6e4ba, 0xbcbe77a0, 0x3ab0ede0,
0x3be31df0, 0xbd008ef5, 0x3cc83032, 0xbc6a01e2, 0xbd08206e, 0x3c0b457c,
0xbbd61be8, 0xbc7bf32c, 0xbcccbca5, 0xbb5d0058, 0x3b9ac520, 0x3b48d520,
0x3c5bd1d8, 0xbb9e8b08, 0x3cdefae8, 0xbce3210e, 0xbc76dede, 0x3cd2d6ae,
0x3c9298d0, 0xbc0b8a8e, 0x3c0d3ba4, 0xbce6c883, 0x3c9f80dc, 0xbcb7cdda,
0xbce736c9, 0xbcf4bc56, 0xbc6720fc, 0x3c9049d8, 0x3c16da64, 0x3c7d7d28,
0x3b99c380, 0xba0c1400, 0x3c8e58de, 0x3aea2600, 0x3ce129b6, 0x3c2671b4,
0x3ce6cb20, 0x3cf414e6, 0x3bc38258, 0xbb2cc788, 0xbcc67c90, 0xbc8b7a78,
0x3c2e65dc, 0x3c0352f0, 0xbd06d63c, 0x3c7a6644, 0xbc4022f0, 0x3cb5c6e8,
0xbc923efe, 0x3c479b9c, 0xbbb43554, 0xbb3e94a8, 0x3c714e5c, 0xbc82f3b6,
0xbcd2b745, 0xbcefa4cc, 0xbc95a210, 0x3d00ac87, 0xbd04a9fd, 0xbcb1b2a2,
0x3d01548d, 0xbc9ec1e7, 0xbcdbf316, 0x3c91ff46, 0xbc6642c6, 0xbb7ab610,
0xbbddad78, 0xbce87b5a, 0xbbd5d360, 0xbcc499f0, 0xbcf86821, 0xbc65160e,
0x3c4a65a0, 0x3c34d350, 0xbca790b0, 0x3cb698fe, 0x3b3b1bb0, 0x3cba0110,
0x3c894804, 0x3cc69c4c, 0x39df2600, 0xbc02c58e, 0xbbc667f8, 0xbc7c750a,
0xbb1f7f48, 0xb8d7cc00, 0xbc6b90fc, 0xbc965d74, 0x3cdd27e8, 0xbc22c1f0,
0xbb181c68, 0xbb0bcf88, 0xbc821b1e, 0x3bf115a0, 0xbcb2d9b2, 0xbc7ce8e2,
0x3c0d5f3c, 0x3ce6b0ee, 0x3c76580c, 0x3c87d3a8, 0x3cf60e7a, 0xbcc239b0,
0x3cb97dfc, 0xbc0c2fd6, 0xbc34453c, 0x3d01b7cf, 0xbcd54bf6, 0xbbf960f8,
0x3c7cbfa4, 0x3cd0a2ec, 0x3cde61da, 0x3c89fb82, 0xbb7a5178, 0x3cd6312a,
0xbc990d45, 0x3cfc53d6, 0xbc8246f4, 0xbcb5ef9e, 0xbb5396a8, 0xbd0331b4,
0x3c74ff60, 0x3c78771c, 0xbcb1e942, 0xbccdeb23, 0xbc663ec6, 0xbca7a238,
0x3c970758, 0x3bb9fbf0, 0x3d01a921, 0xba6682c0, 0xbc3b0368, 0xbd00ea8e,
0x3cf25042, 0x3cdd8a30, 0x3c5caf1c, 0x3d015f8f, 0x3ccdb6c8, 0xbc16039e,
0x3c686a00, 0x3b1c0530, 0xbb78ee88, 0xbc30d322, 0x3bff3ea0, 0xbba0f880,
0x3a8b4c80, 0xbce257ec, 0xbc801f3a, 0x3cb55980, 0xbc1f39ce, 0xbc988420,
0xbc211ae2, 0xbca88fc7, 0xbc9628a9, 0x3cb5fac0, 0xbc7acf74, 0xbc54d4a2,
0x3b6a6e10, 0xbc959eb8, 0x3ca89150, 0xbd017bae, 0x3cfe6956, 0xbcd49223,
0x3c60def8, 0x3b141eb0, 0x3d05f1a5, 0x3c25c610, 0xbcc62d89, 0xbc4fa5da,
0x3cca0248, 0xbb353698, 0x3cc27770, 0xbbf02760, 0xbc88fcf8, 0x3c4dbf28,
0xbce4795a, 0xbc7a6e9a, 0x3c833294, 0x3cc77a1c, 0x3ca92676, 0xbc7db18e,
0xbcca16be, 0xba0f4500, 0x3c830bdc, 0x3ca7ffb8, 0x3b89bea0, 0x3c22b0ac,
0xbcba6938, 0x3d04a6bd, 0x3d012b07, 0x3cd724fa, 0xbc57ec4a, 0x3c75f5fc,
0x3cc2c078, 0xbcb295f6, 0xbcfa7d8e, 0x3828f400, 0xbc845ce0, 0x3cb16880,
0x3b2645c0, 0xbc9efdb0, 0xbcefe3cc, 0xbcf5a90c, 0x3ce8afba, 0xbd01b1b8,
0xbc3dbaac, 0x3c96c3ac, 0x3b923e60, 0x3b9c4b90, 0xbc3ca5f4, 0x3b17cf90,
0xbc94a080, 0x3ca88be0, 0xbc95f7d0, 0x3cbc7a42, 0xbc697880, 0xbcefdff0,
0x3ca5127e, 0xbcb75db4, 0xbcc0d9c0, 0xbcbd1d63, 0x3a162300, 0xbc15aa44,
0xbb7e3dc0, 0x3c5476ec, 0x3bd01d88, 0xbb93cbe0, 0x3c4cbf28, 0xbb848660,
0xba55b1c0, 0xbd072216, 0x3b80cf00, 0xbc95fe14, 0x3cf997ca, 0xbd02b8ad,
0x3c68c3ac, 0xbc0e8ff0, 0x3ce3c41c, 0x3ca6954e, 0xbccb68b0, 0x3cd639fe,
0xbc9e0c2e, 0xbb0e12f0, 0xbc3295c4, 0x3bd48bf8, 0x3ce272b8, 0x3ceafbb6,
0xbcdbec25, 0x39c08980, 0xbcd13f58, 0x3c691224, 0x3cda4486, 0xbc8c1baf,
0x3c811d76, 0xbc941507, 0xbcee6978, 0xbcf777d2, 0x3ca78c30, 0xbc8fe2c0,
0xbcc1247c, 0x3b0c50e0, 0x3c962eac, 0x3b0983b0, 0xbd086f5d, 0xbbfabcb4,
0xbcbb954c, 0x3ccd797c, 0x3c3f6e40, 0xbaf64280, 0xbc4df5ce, 0xbc7ff25a,
0xbc9ceb1a, 0x3c54e5e4, 0xbc7d635e, 0xbc60e4b8, 0x3b8901f8, 0x3d05c733,
0xbcda7545, 0x3ce0f936, 0x3b853218, 0x3c3b9bc8, 0xbb400ba8, 0x3ca93066,
0x3bd8f650, 0x3c976b9c, 0x3c6e5b44, 0x3c9588b2, 0x3cd7e08e, 0x3c9c0d04,
0x3c4a9208, 0x3cdf445c, 0x3c9c475a, 0xbce6f71c, 0xbcf2c7b8, 0xbcadf7b6,
0xbbd02480, 0xbcebd9fc, 0x3cb37056, 0xbcc0157c, 0x3cfc1d0e, 0xbc766028,
0x3c170cf8, 0x3c33f4e8, 0x3ca2ede6, 0xbb96631c, 0xbcd56087, 0x3cdbe8be,
0x3cf235fe, 0x3c1e8564, 0x3c989d7a, 0xbca5dda9, 0xbcc5e9fc, 0xbcbbf0b6,
0x3cf0cd2a, 0x3b26e860, 0x3bc16ff0, 0xbc2b1f04, 0xbcbbfeb4, 0xbce34010,
0x39b52b00, 0x3d06c2e7, 0x3c4c9f10, 0x3cc9808a, 0xbca949e7, 0x3cd4d572,
0xbce2c890, 0x3d07fdb7, 0xbc885d69, 0x3cf9c136, 0x3cdae38a, 0x3cb90130,
0xbb280730, 0x3c984e78, 0xbbbea2c4, 0x3ad55a20, 0xbb2a5c48, 0xbcc7a4dc,
0xbab8ddc0, 0xbc23c838, 0xbbf56e10, 0xbcdfb5c9, 0xbd01f440, 0xbc8fe040,
0x3cb35202, 0xbcd6d65c, 0x3cafb4d4, 0x3bf83fc8, 0x3cc0e8c4, 0x3a9cfc40,
0xbbf5de2c, 0xbc86ba7e, 0x3c612e80, 0xbca7e7c2, 0x3c951086, 0x3a7f3100,
0xbc222ba2, 0x3a2b3f80, 0xbd02cb75, 0xbc761a38, 0xbccb73f8, 0xbb654d58,
0x3cf8f39a, 0xbc59d8da, 0x3bc73088, 0xbcbb77ee, 0xbcaac3c2, 0x3b2f3de0,
0x3c99f858, 0xbcaf3ef8, 0x3cf272f6, 0x3cc7bb00, 0x3c9583b6, 0x3d000193,
0x3c8a597a, 0x3d06ba1f, 0xbcbc1127, 0xbc716128, 0x3cff6a26, 0xbd00cef0,
0xbcdea778, 0x3c637cd8, 0x3cba48a8, 0x3b97e3d0, 0xbc3d0756, 0x3cfada2a,
0x3aa26620, 0xbc9ee5fe, 0xbcaea500, 0x3b922a98, 0x3b514990, 0x3cf00f8a,
0xbcc87fbe, 0x3cfb788e, 0x3b68ca30, 0x3cb700f2, 0xbb83b5f0, 0x3ccb2974,
0x3b1d7a20, 0xbc6a7830, 0xbb523e10, 0xbcd7564c, 0xbc88e6c7, 0xbcc10e87,
0x3ca9ddd8, 0xbc97a34e, 0xbc657cb4, 0xbcfd32ba, 0xbcfd053a, 0xbce81c36,
0xbca7be80, 0xbc823292, 0x3cb80ea4, 0x3d0515e7, 0xbc494468, 0xbc20589e,
0xbcc50a50, 0xbc31deec, 0xbcc4e52e, 0xbc9a18d6, 0x3cae06e2, 0x3bb56e50,
0x3ce89de6, 0xbce493ce, 0xbc842f60, 0xbac761a0, 0x3cd87ece, 0xbccc198e,
0xb9db1700, 0x3cca6094, 0xbcd88987, 0x3c4f96e0, 0xbbb33800, 0xbbd6e59c,
0xbbd72f80, 0x3c5aa5bc, 0x3d012807, 0x3d08738b, 0xbd02a798, 0x3abc77e0,
0x3c6706dc, 0x3ca6f97c, 0xbc3bee9a, 0xbce09fd0, 0xbd0728c8, 0xbcb1f654,
0xbc441f16, 0xbca4e430, 0x3c127260, 0xb952eb00, 0x3cb4170c, 0x3cd570a0,
0x3cc889de, 0xbb114810, 0xbcbf652c, 0x3cddec56, 0xbc8a2027, 0x3d001313,
0x3b4dca30, 0x3c8f457e, 0xbb2cec20, 0x3c87b5b0, 0x3c0328b0, 0xbbce5768,
0x3ca4e6de, 0x3b9c9618, 0x3cf724c2, 0xbc73db70, 0x3bf391c0, 0x3cd0ac62,
0x3c314338, 0x3ce7253e, 0xba952be0, 0xbca30987, 0xbcaf5902, 0xbcd52f45,
0x3c4b986c, 0x3cf28ac2, 0xbcb18c20, 0xbd07464f, 0xbbcd4a10, 0x3d020e4f,
0x3c3df3ec, 0x3ba9f5d0, 0xbb85d7cc, 0xbacfce60, 0xbacb3e00, 0x3cd1447a,
0xbccaadb0, 0xbc80c71a, 0x3c645bc4, 0x3a8aede0, 0x3cc63036, 0x3b51ed40,
0x3ce04b0e, 0xbce155b8, 0x3bca23f0, 0xbd025ade, 0x3b0541d0, 0x3b393de0,
0x3ccbff10, 0x3cc9b2f8, 0xbc9091c3, 0xbcdf9e94, 0x3c65f884, 0x3c03063c,
0x3c80e916, 0x3cd3157a, 0x3c864a42, 0xbcc95027, 0x3cadbf54, 0xbcb1d287,
0xbc1a0b16, 0x3ca820ac, 0x3c3da888, 0xbbb85800, 0xbc7e76ac, 0xbbf8c3b4,
0xbccb3947, 0x3bb0a468, 0x3b3264c0, 0x3b87fbe0, 0xbba42178, 0xbc813b4d,
0xbcdfeffa, 0xbbcbc300, 0xbcab38f2, 0xbca1e7e2, 0x39faf280, 0xbbd2fac4,
0x3c0dd660, 0xbd0247e8, 0x3a76b040, 0xbcf07389, 0xbba6cbac, 0xbc5724ac,
0x3ca1835c, 0xbb485120, 0xbd045c4d, 0xbd0862e3, 0xbc4c91ca, 0x3bf601b8,
0x3bbc7c88, 0x3c84c53c, 0xbcb74e2c, 0xbca33140, 0xbc3f1184, 0xbc4717c0,
0xbc8ca905, 0x3cab9400, 0xbc075a00, 0x3c8eb8fa, 0xbcd7e487, 0x3cdb8d6a,
0x3bbb5828, 0x3cc83b4e, 0x3c7895ec, 0x3b8d3d20, 0x3ce42ccc, 0x3c93a04a,
0x3ccb2ddc, 0x3ce04e76, 0xbcf3bb07, 0x3bf4fd68, 0xbcc75da7, 0x3cfc2612,
0xbcb04420, 0x3d008441, 0x3c7075c4, 0xbccd9065, 0xbcfe6869, 0x3c729d9c,
0x3c831510, 0xbc7908e8, 0x3c8712ae, 0xbcdaf881, 0x3c97d9b6, 0xbba84654,
0xbcbf7572, 0x3ccf1602, 0x3c9f1f9e, 0x3c0a65bc, 0xbcb7245e, 0xbc0d2f5a,
0xbcc024d6, 0xbc99f849, 0x3c780564, 0x3cc912d4, 0xbcd3976c, 0x3c0949e0,
0x3ba14aa8, 0x3c76a768, 0xbc1aea6c, 0xbcb66334, 0x3ca316ea, 0xbc345952,
0x3cdd4074, 0x3c909030, 0xbcd1e95e, 0xbcc349e7, 0x3c81feb8, 0xbcb90a25,
0x3bafcc58, 0x3c0026a4, 0xbccf477e, 0xbc951f02, 0xbcd82fc5, 0xbcaf1109,
0x3bbaac40, 0xbd00e2ee, 0xba1e7800, 0x3d0535b3, 0x3bd01230, 0xbc3e3cc4,
0xbbe0d370, 0x3bf913c8, 0xbccb8750, 0x3cc43eae, 0x3bf7e520, 0xbca03605,
0x3cbe253e, 0x3cf76296, 0xbc07f126, 0xbcc1a187, 0x3c7020bc, 0xbc33e4bc,
0x3c229120, 0x3c48fd20, 0xbacd2540, 0xbcb292ec, 0xbca7dad6, 0x3cc38596,
0x3cc37a28, 0xbc3a190e, 0x3d07ba63, 0xbcb063b2, 0xbc884e5a, 0xbbb4d6cc,
0x3bdec180, 0x3ce2c20a, 0x3ce07a62, 0x3bf6e028, 0x3b2c73b0, 0x3cda318e,
0xbce13456, 0xbc5d567c, 0xbd03a14a, 0xbc72245a, 0xbbfc90ac, 0x3cd8f36c,
0xbcc891bc, 0xbbb3cf08, 0x3ca0fff4, 0xbcff9263, 0xbbe7fb4c, 0x3a3f9540,
0xbce8f9f6, 0xbc238c26, 0x3c0ec5c4, 0xbcd4159a, 0xbca68c87, 0x3c74ce08,
0x3a03c5c0, 0x3cc5f898, 0x3c0a6bbc, 0x3c1941bc, 0x3d038b0d, 0xbc9c0280,
0x3d0721a3, 0xbc86edb6, 0xbcacd716, 0x3cb7f5ea, 0xbc9f354b, 0x3b8f3ba0,
0x3bcd97f0, 0x3bfc87d8, 0x3c58e8dc, 0xbb9e0460, 0xbcaa1569, 0xbcc06f03,
0xbcc3bf3c, 0x3c1660d4, 0x3d042d93, 0xbcf2df70, 0x3cd05848, 0xbb45ba58,
0x3cb61fca, 0x3c9f104a, 0xba885200, 0xbccb623e, 0xbce2d754, 0x3b6e8200,
0x3c549d00, 0xbd017c4f, 0x3ca2a29c, 0xbc2b662c, 0xbbb1daf0, 0xbc8a599a,
0x3d03e79d, 0xbd03922b, 0x3ce678ba, 0x3c956f06, 0xbba0f344, 0xbd035e33,
0xbd037712, 0xbbb19388, 0xbc4da662, 0x3ce7433e, 0xbcf0de23, 0x3c15c75c,
0xbc244f40, 0xbcef703f, 0x3ac81c80, 0xbc4753b4, 0x3cc77f4c, 0x3cd9fcb0,
0xbd0676fa, 0xba851b60, 0xbcb7cbc9, 0xbd073d96, 0xbc599d62, 0x3b7bf5f0,
0xbd0797b5, 0xbcbb0f58, 0xbb2bb658, 0xbbadcc3c, 0xbc9f687c, 0x3cd4c93e,
0x3c2d2970, 0x3a0a3ec0, 0x3c1a9600, 0x3c149b6c, 0x3c8abef4, 0x3d07cd7d,
0x3c10c2ec, 0x3cb67b3a, 0x3c8b5ff0, 0x3cb049a2, 0xbce69c5c, 0x3ce6ae48,
0x3c83f6dc, 0xbb8aa460, 0xbb244e10, 0x3cbff150, 0x3c6cdaf4, 0x3c7d2c20,
0x3cd80a72, 0xbc86b18b, 0x3ccfb030, 0x3bae4cb0, 0xbcd13558, 0x3ca1aeec,
0xbbb59068, 0x3cebdcb4, 0xbd056059, 0xbcbab0c9, 0x3993f900, 0x3ce784c6,
0x3cae515a, 0x3cc6f57c, 0x3cac1c6c, 0xbc2eca00, 0xbcc78d49, 0xbc59097c,
0x3c8ab526, 0xbc93e3f2, 0x3bad1360, 0xbbfc319c, 0xbc385e00, 0xbc0b8466,
0x3c90dfe6, 0x3badbe18, 0xbb06dd60, 0x3d02a7df, 0xbd03ddeb, 0x3d01ed87,
0xbc549d5a, 0xbc9d4a5a, 0xbce81f3f, 0xbc8bc345, 0x3c96b0fe, 0xbd06035d,
0xbd07efd5, 0x3cf98c2e, 0x3cfe8fee, 0xbc1cc90a, 0x39472b00, 0xbd01a0ac,
0x3cfba286, 0x3cd6f678, 0x3b9318d8, 0xbbea0480, 0x3ca226bc, 0xbca48882,
0x3ca03a2a, 0xbd00b593, 0x3cd8847e, 0xbb0d1548, 0xbcddff27, 0x3a8bca60,
0x3ce2085e, 0x3c8f949e, 0xbcc2c2b0, 0x3b83e398, 0x3c227ba0, 0xbcfe997d,
0xbd0300b7, 0x3c6f8788, 0x3bbe5cd8, 0x3c8f9312, 0x3c41b04c, 0xbcf08636,
0xbb6d7210, 0x3cfda6ca, 0xbc5ebc12, 0x3c41d5fc, 0xbc77e18a, 0xbc3ae016,
0xbcd7fdbe, 0x3b6a6880, 0xbc9efc12, 0x3c7d8080, 0xbb9f6b3c, 0xbad5be00,
0x3ac05de0, 0x3befe9d8, 0xbba7284c, 0xbc80608b, 0xbae089a0, 0xbc84d0c0,
0x3c724c9c, 0xbc1f42de, 0xbcc92d7a, 0xbc4ae99e, 0xbd006a18, 0xbc819cd6,
0x3cb06f54, 0x3cf0367e, 0xbcf8e2d4, 0xbca7fe22, 0xbccc1327, 0x3cc44204,
0x3bec0bb8, 0x3c15b4d0, 0xb9fb1180, 0x3c96912c, 0x3cb75af8, 0x3c860fa4,
0x3ce33c00, 0xba891ce0, 0xbcca8127, 0xbca4d3bc, 0xbccfd952, 0xbca2e57e,
0xbc8d7916, 0xbc875bef, 0xbc9deeb6, 0xbbb0d908, 0xbcc6b3a7, 0xbb1fbf68,
0x3ba764d8, 0xbcf7333d, 0xbd0448a2, 0xbcc365a3, 0x3bc116c8, 0x3ce96450,
0xbbf58fa4, 0x3c3775c4, 0xbc737f96, 0xbc292156, 0xbba41a1c, 0x3cba6c00,
0xbcdb4a47, 0x3a995be0, 0xbd04d654, 0xbc90512d, 0xbcf84b4e, 0xbbb9d44c,
0x3c4791d8, 0xbbeebc60, 0xbc9dd8fe, 0x3c0f4000, 0xbb16c4e0, 0xbaf28020,
0x3ca95434, 0x3bf73158, 0xbc4db56c, 0x3a133480, 0xbcb20d1e, 0x3c35d088,
0xbcd254b0, 0x3cf70ace, 0xbd01e9b0, 0xbc4d3f22, 0x3cf1b496, 0x3b49f3e0,
0xbc8f9f2f, 0xbc85c9d6, 0x3a1d5040, 0x3c00c560, 0xbc422a1a, 0xbc144cb0,
0xbc98753e, 0x3c9531d4, 0x3a89f820, 0xb9a60600, 0x3b193030, 0xbcc29280,
0x3cfd556e, 0xbcd1cbd4, 0x3abe7ac0, 0x3cd351e4, 0x3cc08c7a, 0xbba70f80,
0xbc0e0530, 0x3cfc223a, 0xbc67cae8, 0x3cdc2f3c, 0x3c40b67c, 0xbcff7a8e,
0x3bd87fd0, 0xbc249352, 0xbbdcc370, 0x3cc25198, 0x3c99b5d2, 0xbcf838ee,
0x3cbae4c4, 0x3c3384f4, 0xbd02b0bd, 0x3d01755f, 0x3c43808c, 0xbcb477b8,
0xbd02923b, 0x3d0692df, 0x3ca8f5b6, 0x3cb1187a, 0x3cb0dc94, 0xbcc47203,
0x3c46c2cc, 0x3d07d1af, 0xbc897874, 0xbcb1804e, 0xbd077393, 0x3bf2fca8,
0x3d0307a3, 0xbbed9be8, 0x3c5d87b0, 0x3c4761a4, 0x3c094538, 0xbbaa94e8,
0x3c902bce, 0xbcd29c54, 0x3cae989a, 0x3ca0159c, 0xbcc4634c, 0x3cbe1a62,
0xbb494998, 0xbb2d35e0, 0xbb4388e0, 0xbc2aa9e6, 0xbd05a907, 0xbbadf770,
0x3ccb3196, 0xbcbbcc92, 0xbd01dd44, 0xbccee7b2, 0x3b82bcd8, 0xbbd39008,
0xbcd1a99c, 0x3c066588, 0x3cdf6bc8, 0xbbc89024, 0xbccac8e7, 0x3d007d01,
0x3ce69722, 0x3ccfe0cc, 0x3cdc3dea, 0x3c905bc4, 0x3bb61460, 0x3cef284e,
0x3cc4bef0, 0x3b6700b0, 0xbab30ba0, 0xbc8c9f89, 0xbc18ffd6, 0xbc8698b4,
0x3cb0599c, 0x3c08f170, 0x3caac12e, 0xbc29cfb8, 0x3c86bf56, 0x3bc4a360,
0x3c10b9fc, 0xbcb47054, 0x3a95c360, 0xbceec009, 0x3b2eed90, 0x3cd8e19a,
0x3cf5d8c6, 0x3ccb810a, 0xbc126644, 0x3cd5ae9a, 0x3c5734b4, 0x3bf9fee8,
0x3cff98b2, 0xbc4414c0, 0xbcebdf85, 0xbcb8cbb8, 0x3bc4d5a8, 0x3c5be984,
0xbbed5458, 0xbba4781c, 0x3cdb8b42, 0x3c52eab0, 0x3c960820, 0xbb56f868,
0xbc3e8592, 0xbcaabb85, 0xba614f00, 0x3cfeb4aa, 0xbcd7f9ce, 0x3ce447c8,
0x3c325164, 0x3cdef2e0, 0x3ce12e80, 0x3c2b4f64, 0xbcb51d72, 0xbbfdd4e0,
0x3caec252, 0x3d084b45, 0xbca264cb, 0xbcc28620, 0xbb9d03e8, 0x3d03d5af,
0xbc21079a, 0xbc1f78c4, 0xbcbc17e7, 0xbce1c11c, 0xbcae75f6, 0xbc2b3580,
0xbce9c285, 0xbc9cf3da, 0xbbb54f80, 0x3c1a2ca8, 0xbcc8a238, 0xbb2fdbc0,
0xbcf3359a, 0x3bf58e78, 0xbcd241d4, 0xbbbdb780, 0xbbf903cc, 0x3ca1aede,
0x3cb51d94, 0xbcb93220, 0x3c9f3810, 0xbcaed450, 0xbb3c5198, 0x3cb1f008,
0xbcced540, 0xbc24ff84, 0xbced7136, 0xbbb02670, 0x3ccc6cf2, 0xbc0286f8,
0xbc88d8c0, 0x3c5fa5f4, 0x3ca98f76, 0x3cec80a8, 0x3ca7def4, 0xbca582ba,
0x3c7f792c, 0x3c241300, 0x3b3010a0, 0x3bd62058, 0xbc1337f8, 0x3a21b640,
0xbcc955b2, 0xbb7cc600, 0x3ca54724, 0x3c6f1a0c, 0x3c9fb978, 0x39f92d80,
0xbcc11f1e, 0x3a006d40, 0xbc8ce134, 0xba6e3f40, 0x3ba9b720, 0x3b1c5da0,
0x3caebcac, 0xbc8d3cf2, 0xbcacee5e, 0x3a8fc4e0, 0xb9df7f00, 0x3c82302e,
0xbbf8d124, 0x3c29ee5c, 0xbbd647cc, 0xbb89cdf0, 0x3b13dea0, 0xbc41da12,
0x3cdb47d4, 0x3ce61f08, 0x3ab371c0, 0x3b165e60, 0xbc46205a, 0x3cca411e,
0x3d070b51, 0xbc5c7cc0, 0x3c1c63a4, 0xba816c40, 0xbce42087, 0x3ca64dda,
0x3c54efdc, 0x3cc7dfea, 0x3cc16c96, 0xbbb68768, 0xbae18de0, 0xbcdadfc1,
0x3c2accb0, 0xbc6a1e7c, 0x3c2dbf74, 0xbcd2bfbc, 0xbb36b010, 0xbccf57e7,
0x3c8d0a92, 0x3d07900f, 0xba42a500, 0x3cc67a20, 0xbd00b7ea, 0x3ca53984,
0x3cce6394, 0x3cf28e46, 0x3c229e30, 0x3cec6b26, 0x3ca12fac, 0x3c1294cc,
0xbd052988, 0x3b87db80, 0xbc4ec378, 0x3cdb7142, 0xbcd7d0f2, 0xbd074a48,
0xbca2c434, 0x3ba4d990, 0xbcb2c86e, 0xbc98fe14, 0xbbad4670, 0x3c235d4c,
0x3ba07a88, 0xbb21fe78, 0xbc46f41a, 0xbc2096f0, 0x3d00192b, 0x3cd7bf32,
0xbcccf990, 0x3c8a79f2, 0x3ca5ccea, 0x3cee5d52, 0xbce28307, 0xbcbf73c5,
0x3c5ce758, 0x3c3ac870, 0xbcbeecc0, 0xbc0c4634, 0xbca1113a, 0xbc2634fc,
0x3c362d28, 0x382c8800, 0x3c76a94c, 0xbc130fb0, 0x3d05ba49, 0xbcbcc798,
};
// 7
uint32_t bias_vals[] = {
0xbc3266ec, 0xbbe43fa4, 0x3c6898b4, 0x3c210868,
0xbcec5130, 0xbb382ff0, 0xbcd3e31a,
};
// 4,1,1,7
uint32_t output_exp_vals[] = {
0x3f0ae6f4, 0x3e06d0c7, 0x3e8cb149, 0xbb4830b7, 0xbe872094, 0x3ea9f6e4,
0x3e83094a, 0x3f29eae5, 0x3e4738be, 0x3eaa75dd, 0x3d940a8e, 0xbe7e27dd,
0xbe0ac591, 0xbd849768, 0x3eb2a525, 0x3f244889, 0x3ec81eaa, 0xbd5edf10,
0xbe003ea4, 0xbd500669, 0x3e220519, 0x3f356aed, 0x3ee21c12, 0x3e9c80a2,
0xbe3e4da9, 0xbe3df2ea, 0x3e3846d2, 0x3dd1579c,
};
// 4,1,1,7
uint32_t output_relu_exp_vals[] = {
0x3f0ae6f4, 0x3e06d0c7, 0x3e8cb149, 0x0, 0x0, 0x3ea9f6e4,
0x3e83094a, 0x3f29eae5, 0x3e4738be, 0x3eaa75dd, 0x3d940a8e, 0x0,
0x0, 0x0, 0x3eb2a525, 0x3f244889, 0x3ec81eaa, 0x0,
0x0, 0x0, 0x3e220519, 0x3f356aed, 0x3ee21c12, 0x3e9c80a2,
0x0, 0x0, 0x3e3846d2, 0x3dd1579c,
};
test_conv2d(set, strides, input_vals, kernel_vals, bias_vals, output_exp_vals,
output_relu_exp_vals, VALID_PADDING, NULL);
}
void test_same_padding_non_zero_strides_large() {
input_set *set = &large_input;
strides_input_set *strides = &large_non0_strides;
// 4,15,10,6
uint32_t input_vals[] = {
0x3b227300, 0x3f239634, 0x3e391590, 0x3f09c42a, 0x3e166940, 0x3e069194,
0x3f5d35bc, 0x3f1db843, 0x3f50936c, 0x3e8b542a, 0x3f1a91df, 0x3e086f24,
0x3e493b6c, 0x3e70d520, 0x3da58e98, 0x3f47d07e, 0x3f4e3621, 0x3f672084,
0x3f08a2e8, 0x3f243d5c, 0x3ecd7c1e, 0x3e99be2a, 0x3eb2bb0a, 0x3ef2679e,
0x3f56d025, 0x3f2783aa, 0x3f2bf3f6, 0x3f71a850, 0x3eaf2578, 0x3e63f6dc,
0x3e8df942, 0x3f15d269, 0x3eb210a4, 0x3f700f08, 0x3f28078d, 0x3e2abb70,
0x3f155820, 0x3e59b2f8, 0x3e98d6d2, 0x3d1d8d70, 0x3f37490b, 0x3d292ac0,
0x3eb0129a, 0x3e294e38, 0x3f3e92bb, 0x3eb536ce, 0x3ee0aad6, 0x3edb1218,
0x3f259a84, 0x3d9b6888, 0x3d769400, 0x3f5bbbf0, 0x3e752aac, 0x3dee2610,
0x3f501963, 0x3e646fa4, 0x3eb97480, 0x3f61aed5, 0x3f045fc5, 0x3f6eec5f,
0x3ee9c662, 0x3f5da091, 0x3f008237, 0x3e22af00, 0x3eac1394, 0x3f16c6a8,
0x3e95e63a, 0x3f7cb28a, 0x3f6e5ab1, 0x3e2ff794, 0x3dda8690, 0x3eb6527e,
0x3f545081, 0x3ecc3588, 0x3ecdda7a, 0x3f5e48ef, 0x3e1dbd08, 0x3f388474,
0x3ec9797c, 0x3e20ddfc, 0x3e210f04, 0x3dbf1058, 0x3dfef5f0, 0x3eca4c76,
0x3ed27c6a, 0x3f1c72dd, 0x3d8caa50, 0x3f1732a3, 0x3e55c1dc, 0x3f7849e2,
0x3f78300a, 0x3cecc400, 0x3f2920de, 0x3f443ee2, 0x3e526a40, 0x3e2ac644,
0x3f3fa4c6, 0x3f14a766, 0x3ecf9714, 0x3f54295a, 0x3e6a3400, 0x3f3a3c48,
0x3f4d1a5a, 0x3f649eb8, 0x3f632da3, 0x3f248220, 0x3e4dd434, 0x3efa92c4,
0x3f0fc029, 0x3e5a078c, 0x3e76b808, 0x3f19a861, 0x3f774430, 0x3f77569e,
0x3de9afd8, 0x3d90e268, 0x3f1ac701, 0x3f57f86b, 0x3f66a396, 0x3ecec390,
0x3e88fe3c, 0x3df434e8, 0x3f31c5c9, 0x3e32ebf0, 0x3f75dba1, 0x3ee0ab1c,
0x3f16a274, 0x3ea1bf7c, 0x3ed0d7ec, 0x3e1ad758, 0x3f45ef00, 0x3f27c019,
0x3f7b46cc, 0x3f493cd3, 0x3f3c87aa, 0x3ef91326, 0x3efd7bd4, 0x3f16e60d,
0x3db0d428, 0x3da74760, 0x3ea228e0, 0x3f1ae3e8, 0x3ea12492, 0x3ce1b520,
0x3e2f785c, 0x3f129c0b, 0x3f04c774, 0x3e4c3798, 0x3eeab532, 0x3f0b3116,
0x3ef157ec, 0x3f66aea5, 0x3eaf7386, 0x3ea57c6a, 0x3e1b21a0, 0x3f310164,
0x3ea4b1c0, 0x3d53d630, 0x3f6469ee, 0x3ead75d0, 0x3f09cd39, 0x3e95e5c8,
0x3bd4df80, 0x3de69818, 0x3e5265ac, 0x3f2da038, 0x3f16ca3a, 0x3f1f2851,
0x3f3483a0, 0x3f6d0642, 0x3f4d0c24, 0x3db32fe8, 0x3d860e10, 0x3f330ec4,
0x3f2fde45, 0x3e82c0f4, 0x3f0f720d, 0x3f6daebd, 0x3f03ae0e, 0x3f0b8600,
0x3ebf31c4, 0x3ea4aa22, 0x3f1c78a3, 0x3c716e40, 0x3e4575d8, 0x3f57ae16,
0x3e6a4260, 0x3f5e50c1, 0x3ecf21ea, 0x3f075442, 0x3ec51e74, 0x3edb6b48,
0x3f2d9719, 0x3ec9526c, 0x3e70fc3c, 0x3afcb800, 0x3f2f4f9c, 0x3f34ab61,
0x3e989b0e, 0x3f1ad501, 0x3f7b4eb2, 0x3f3c5a20, 0x3f398ad5, 0x3f21b58b,
0x3f5339d1, 0x3eb99f66, 0x3f29336e, 0x3f198507, 0x3f1ff7c2, 0x3f0f7b7c,
0x3f7da3d8, 0x3f6d870b, 0x3f70cbc9, 0x3f2684fd, 0x3e96332c, 0x3e3078d0,
0x3e809b9c, 0x3f67939d, 0x3f387367, 0x3f067a8f, 0x3f3ab1fd, 0x3e35a1d8,
0x3e998322, 0x3f173882, 0x3f4547d2, 0x3f7ab26b, 0x3ec8f190, 0x3f63d684,
0x3df099a0, 0x3f7f5118, 0x3eec1784, 0x3f3eaf5f, 0x3f35a0ef, 0x3dcb6f90,
0x3e946d0a, 0x3f0f8eb7, 0x3dc1fd68, 0x3f518bf1, 0x3f5a11d0, 0x3f246bec,
0x3ef26256, 0x3e0f2670, 0x3f1b8e29, 0x3f129ede, 0x3ef82a46, 0x3eb39a8e,
0x3dab1d08, 0x3e8b823a, 0x3f1be004, 0x3cb9b580, 0x3e9293da, 0x3f30e6c2,
0x3f50a3ac, 0x3f1d72ac, 0x3e7b6bf0, 0x3e28a754, 0x3f36df9b, 0x3ddf8260,
0x3c81b4a0, 0x3f07fcc4, 0x3f69986c, 0x3e1de344, 0x3ec52ec4, 0x3c57bfc0,
0x3f0b3294, 0x3f63093d, 0x3f1b7dce, 0x3ed7e19a, 0x3e27a4e4, 0x3ddf4478,
0x3f478ae5, 0x3f3f771e, 0x3e5b9e3c, 0x3ec4f3c4, 0x3eead50e, 0x3f321718,
0x3f1b6d81, 0x3f276718, 0x3f40d46c, 0x3f17eb30, 0x3f56db9d, 0x3f35a402,
0x3e24e830, 0x3f404ba7, 0x3f6d17d4, 0x3ea907b6, 0x3f55b98a, 0x3f31ac61,
0x3f27cd25, 0x3e453574, 0x3efb8bb2, 0x3f2a78c8, 0x3c219740, 0x3e9733f6,
0x3e1b3dac, 0x3e52b850, 0x3ea19c54, 0x3f74eea4, 0x3ec0888e, 0x3ea375a2,
0x3e7d1340, 0x3efefce8, 0x3ef7b7a4, 0x3f15e1d4, 0x3e47e7c0, 0x3ed4e0e0,
0x3f4c78ae, 0x3e464d58, 0x3f3c8f56, 0x3db95320, 0x3f540c87, 0x3ee0a846,
0x3eee9cd4, 0x3f679b97, 0x3eeb1728, 0x3f2eda9a, 0x3f41cb41, 0x3e6a0438,
0x3ed619c8, 0x3f0de10b, 0x3eb85afc, 0x3f102337, 0x3f228999, 0x3f0ba4fb,
0x3f58c2e2, 0x3f475fb4, 0x3f0285eb, 0x3ea4b84a, 0x3f686f8d, 0x3c787280,
0x3f483440, 0x3e87dcba, 0x3e186dd4, 0x3dbbe590, 0x3f0a98cd, 0x3f4f4b31,
0x3f3c31a3, 0x3e7ec630, 0x3dbc4970, 0x3f0e6e6c, 0x3e6c6300, 0x3f533bb5,
0x3e5252c8, 0x3f7bbeee, 0x3e512cf0, 0x3e6eaa88, 0x3ed19e52, 0x3ef2dfd4,
0x3f66298e, 0x3e06c284, 0x3f7913ea, 0x3e086028, 0x3e983d58, 0x3eb12cde,
0x3f69b259, 0x3f1bd003, 0x3e378cc8, 0x3d8336f8, 0x3f39dc93, 0x3f1ec3fe,
0x3ebb3a58, 0x3f5da147, 0x3f6f35c2, 0x3f30f554, 0x3dbbca18, 0x3f2658b0,
0x3f6f702e, 0x3f48373a, 0x3ef3b78e, 0x3f09770f, 0x3edf1ef2, 0x3f73bce8,
0x3f166e85, 0x3f1bf0ab, 0x3f6d0fb0, 0x3f6dbb6b, 0x3e85114e, 0x3e36911c,
0x3a967a00, 0x3f71e565, 0x3ea6d724, 0x3ef5c85a, 0x3eb070ea, 0x3e3fb128,
0x3f0ec81c, 0x3eff7f18, 0x3e21e134, 0x3f119379, 0x3f726941, 0x3eaa67fa,
0x3cd05980, 0x3f646c18, 0x3f343541, 0x3efb9554, 0x3f6e7051, 0x3e5560f4,
0x3f2e20b0, 0x3f4584f1, 0x3f57bd33, 0x3e62445c, 0x3e0db0ec, 0x3e5e4a7c,
0x3f6e32e6, 0x3efabfb4, 0x3f68f574, 0x3e5d16f0, 0x3f1504f6, 0x3ea6319e,
0x3dfece38, 0x3ed875ea, 0x3f7bc660, 0x3ea7df5c, 0x3f00b6e2, 0x3f7bbfd8,
0x3f44f6d3, 0x3f06d40f, 0x3ead5386, 0x3f2ab30b, 0x3f78b86a, 0x3f1d2711,
0x3f78b429, 0x3f7b3075, 0x3f16c7f6, 0x3f4d8f95, 0x3f5c6505, 0x3e154bd8,
0x3dac8ea8, 0x3f29ce96, 0x3f4a8764, 0x3f65ee0a, 0x3ef75684, 0x3e4668ec,
0x3ea73150, 0x3e78878c, 0x3ec29160, 0x3eacb782, 0x3f6bcb4b, 0x3ecfc520,
0x3f6c98ff, 0x3cf2ac20, 0x3f04bbf0, 0x3dda9218, 0x3f17043f, 0x3dd99ac0,
0x3f2f7419, 0x3e151de8, 0x3f3d2a1d, 0x3e4904cc, 0x3f5a5aeb, 0x3e9df2ac,
0x3f2c2b40, 0x3dbd7280, 0x3f467843, 0x3d777790, 0x3f141bc5, 0x3f2acbac,
0x3ebcc4ac, 0x3f49ae6c, 0x3d903228, 0x3f7f1524, 0x3edee394, 0x3f10274a,
0x3f5fe92b, 0x3eb6e0fa, 0x3db0b4d8, 0x3f57f931, 0x3f7d17f4, 0x3f7568e6,
0x3f169c26, 0x3f47d4f7, 0x3f60c98a, 0x3f51a1e1, 0x3f406684, 0x3e214454,
0x3e65d3d4, 0x3f105cc0, 0x3eeb2868, 0x3f00677c, 0x3d840968, 0x3f0c7af9,
0x3e647864, 0x3dbddbb0, 0x3f0858ee, 0x3f65bdc4, 0x3f3c397f, 0x3f171337,
0x3cf62fe0, 0x3ea6e188, 0x3f445b06, 0x3f153ac8, 0x3ebffa40, 0x3ea58e80,
0x3e2098e0, 0x3f355606, 0x3f1bef0c, 0x3f083143, 0x3f46cd34, 0x3f6944c6,
0x3f122dac, 0x3ec87b64, 0x3eb02282, 0x3f3d5363, 0x3e8d3f60, 0x3f4594ef,
0x3f27eddc, 0x3f5e8d79, 0x3f510543, 0x3ec1869e, 0x3e01bb4c, 0x3f1e21f4,
0x3eb8f602, 0x3f46079e, 0x3f4412bf, 0x3eb54668, 0x3b115600, 0x3f46043a,
0x3c899840, 0x3f7c32fa, 0x3f297c08, 0x3e05d578, 0x3eb9b888, 0x3f62adb0,
0x3ebfeb6c, 0x3dc73698, 0x3f7b0564, 0x3f6ad0a8, 0x3f271995, 0x3daf2d98,
0x3f4f6c8a, 0x3f608210, 0x3f1e818b, 0x3f21c37f, 0x3f29bbe3, 0x3ec29db8,
0x3f048eed, 0x3e578144, 0x3f7711d1, 0x3e4f9d44, 0x3eca2398, 0x3e903ca0,
0x3ea10eda, 0x3f7ccd9d, 0x3f78c368, 0x3d899908, 0x3f7da29f, 0x3f07e4b3,
0x3f643cb6, 0x3efd82da, 0x3de20110, 0x3f236212, 0x3f1add60, 0x3ecf797a,
0x3f61c12b, 0x3f6b564c, 0x3e43140c, 0x3ec424fe, 0x3f665adb, 0x3d982838,
0x3f3ccd0d, 0x3e960a94, 0x3e845d42, 0x3f6fe543, 0x3e81547a, 0x3f30a7ee,
0x3e440f70, 0x3eee3322, 0x3eeead26, 0x3f36b3f9, 0x3f304acd, 0x3ed1598a,
0x3ec1edda, 0x3e5f47d4, 0x3efd6506, 0x3ec67c2a, 0x3df9b4f0, 0x3e9df502,
0x3f45e401, 0x3e589ad8, 0x3e1a93b4, 0x3f5c46b8, 0x3e621148, 0x3f7f5600,
0x3f72baff, 0x3f4123e2, 0x3f71a85d, 0x3ead1466, 0x3daa2a58, 0x3f3c9ae3,
0x3f4ca2fc, 0x3f60ab28, 0x3f65251a, 0x3eb71be8, 0x3f2a8be0, 0x3ef2cf8a,
0x3de3cfa8, 0x3f5eedd8, 0x3dbe16d8, 0x3e00f3b4, 0x3e9c556c, 0x3e8e6816,
0x3c8fe4a0, 0x3f47981b, 0x3f647967, 0x3f2302d6, 0x3f6db572, 0x3e756f1c,
0x3f237ae4, 0x3f260622, 0x3f37b5f3, 0x3ebaa3a0, 0x3c14c2c0, 0x3ef040e0,
0x3ecdd6cc, 0x3f09c5ba, 0x3eafffe0, 0x3ee2ad9c, 0x3e69c148, 0x3f45a742,
0x3eef0b64, 0x3e971e1c, 0x3e92f68a, 0x3e8c65b2, 0x3f1d2171, 0x3f465233,
0x3f7ce279, 0x3f728308, 0x3f2cfe96, 0x3f63220d, 0x3ee210e0, 0x3f21773f,
0x3f13a5e2, 0x3e33a0d0, 0x3cf33a80, 0x3f734583, 0x3f53a2dc, 0x3d047510,
0x3d723680, 0x3ef45e58, 0x3c239340, 0x3f2423d9, 0x3f0375eb, 0x3e9ca4c2,
0x3ee4a250, 0x3ed13724, 0x3f460cda, 0x3efccdf4, 0x3f62d212, 0x3f0709fc,
0x3e6e0788, 0x3ef183b6, 0x3f040b9f, 0x3f25cdd1, 0x3eecf9aa, 0x3f4d69af,
0x3f2474ef, 0x3e183368, 0x3f4bacf2, 0x3e38d730, 0x3f5dcdc3, 0x3ebbe596,
0x3edd6934, 0x3f03f7f2, 0x3e185098, 0x3eac703a, 0x3d1c79d0, 0x3dd411b8,
0x3f364ceb, 0x3f070ed7, 0x3ea6470a, 0x3d52aae0, 0x3f720e21, 0x3ea081d8,
0x3d94d6c8, 0x3f25e9c7, 0x3e288ac4, 0x3f01a8ad, 0x3eb0175a, 0x3ec99266,
0x3effa4b2, 0x3f3923ca, 0x3f1f7ee6, 0x3e068364, 0x3f284e78, 0x3d899a78,
0x3f3ada39, 0x3f70a7c2, 0x3e993112, 0x3ee1528a, 0x3f63bc2a, 0x3f0281a1,
0x3f6e80be, 0x3ed4482a, 0x3f3d10e7, 0x3f2f11ec, 0x3d82e6e8, 0x3f30d8dc,
0x3d59a860, 0x3f3a3acc, 0x3e18b574, 0x3f683a08, 0x3eb08d96, 0x3ebc9c68,
0x3e9cb71e, 0x3d976330, 0x3f5cf76c, 0x3edc775c, 0x3f21f7d3, 0x3f63706b,
0x3ecaac74, 0x3e8da316, 0x3e963970, 0x3ee921f2, 0x3e5ddfcc, 0x3f731a16,
0x3d343f70, 0x3dbb54b0, 0x3eab397a, 0x3f2fd8b0, 0x3f590838, 0x3ef754ea,
0x3db72170, 0x3f1223bc, 0x3f40aa88, 0x3e1133e8, 0x3f2e5bd2, 0x3eecf71c,
0x3dbaeaf0, 0x3f6dfea7, 0x3f26cbe0, 0x3dfef600, 0x3f4ea023, 0x3eb03008,
0x3f007f87, 0x3ec6fb9a, 0x3ef3a764, 0x3f788842, 0x3f1ff8ad, 0x3f49695f,
0x3ee9599a, 0x3f55dff9, 0x3f69a690, 0x3eaaa87a, 0x3c923cc0, 0x3f6d2d26,
0x3e9fddae, 0x3eaaa242, 0x3f077995, 0x3e22c624, 0x3e6e3abc, 0x3f5ce1ad,
0x3e48f3b0, 0x3d9ebf48, 0x3ecec5be, 0x3f5dd00b, 0x3ef86604, 0x3f679d61,
0x3cdb0360, 0x3df97ec0, 0x3f25ec6b, 0x3f363d3f, 0x3e596a0c, 0x3f7f2347,
0x3edae694, 0x3f6132f5, 0x3d800fc0, 0x3f3bc71f, 0x3e0ad0e8, 0x3f486f7d,
0x3df0ddd0, 0x3f5430fb, 0x3dfc07c0, 0x3f2bef96, 0x3e09ee0c, 0x3c31b900,
0x3f65a5a4, 0x3e5a7adc, 0x3dfc5d50, 0x3f688721, 0x3f4147d4, 0x3f150ac4,
0x3e480a64, 0x3f2369d7, 0x3e5f2f14, 0x3f6459bf, 0x3dcc8530, 0x3f2128f4,
0x3e59dfd8, 0x3f09ee57, 0x3ebc11e4, 0x3e696ebc, 0x3f7b5d10, 0x3f4f2966,
0x3e10c538, 0x3e419370, 0x3f612083, 0x3f7c7854, 0x3f2e0e5f, 0x3eb470f2,
0x3d9a3708, 0x3e98d21c, 0x3f56c03b, 0x3f0c82c9, 0x3d1488d0, 0x3d99d150,
0x3f7d2b78, 0x3f51c7dd, 0x3d4ac580, 0x3f452f56, 0x3c637bc0, 0x3f73d49d,
0x3f2a91a9, 0x3f43c4bd, 0x3d0fdde0, 0x3ea53ab6, 0x3de05858, 0x3f7a2eff,
0x3f3adcb7, 0x3e7da6ac, 0x3f6c400b, 0x3f2cf549, 0x3f410de1, 0x3f6c1df4,
0x3f13f3e7, 0x3ece914e, 0x3ef81cf2, 0x3f517093, 0x3df8d6e8, 0x3d0103b0,
0x3edab6cc, 0x3e07ff04, 0x3d7ceec0, 0x3f2c77d7, 0x3f52b252, 0x3e46be24,
0x3f3ccb1f, 0x3e2b7570, 0x3e00e6a4, 0x3e858172, 0x3e8b1e94, 0x3f423cdf,
0x3d7b2790, 0x3ec635ea, 0x3f086a23, 0x3f3babb7, 0x3eca00da, 0x3dd86da0,
0x3e6d554c, 0x3f22bdb0, 0x3f7e1cf7, 0x3e299cd0, 0x3eedd0b0, 0x3f154835,
0x3e9d9c14, 0x3f2b023e, 0x3eec0434, 0x3f48c4fc, 0x3f7f2970, 0x3f2bd569,
0x3e95596e, 0x3e7d79c8, 0x3f2e5e73, 0x3f15abd8, 0x3e889f28, 0x3ecc220c,
0x3f241599, 0x3f71b6f6, 0x3f2b4cb0, 0x3d0c1740, 0x3f5d7ec2, 0x3f54ce68,
0x3ea8ff4c, 0x3f60bdde, 0x3d0b0820, 0x3ee349b2, 0x3e8a28a2, 0x3f3c4767,
0x3f5b0ac8, 0x3f0af48f, 0x3ed0230c, 0x3e86b4e4, 0x3f2b0d4a, 0x3d98d4b0,
0x3f0698a6, 0x3dae6af8, 0x3d30d310, 0x3ded8630, 0x3f635c9c, 0x3e9c1ce4,
0x3cc29100, 0x3f5e699c, 0x3f774a6f, 0x3f2fc333, 0x3f7ca2dc, 0x3f6b4b10,
0x3ebff2b8, 0x3edce210, 0x3f15b40b, 0x3f414d23, 0x3f295ef3, 0x3d1255c0,
0x3e99ec1e, 0x3e67421c, 0x3f561b2b, 0x3e35e1c4, 0x3f396694, 0x3f0a8afc,
0x3ea9504c, 0x3f2ba5b9, 0x3f765bb2, 0x3d4c0170, 0x3edf6faa, 0x3f53c21f,
0x3eed2712, 0x3f240319, 0x3ea34d6e, 0x3f53e9d4, 0x3ed5939e, 0x3ec683c6,
0x3e97b538, 0x3da0cf30, 0x3f198721, 0x3bea2380, 0x3e9b28da, 0x3e9341c6,
0x3e4414d0, 0x3e86a172, 0x3f37d055, 0x3f4f6140, 0x3f6fa7aa, 0x3f7cf6be,
0x3f63e38d, 0x3f36b40f, 0x3f25c24a, 0x3f57dcee, 0x3ee8e10e, 0x3d951730,
0x3f266947, 0x3e6ef9e8, 0x3db91588, 0x3dd773d8, 0x3f0a5722, 0x3ec2aab2,
0x3f696037, 0x3e81cbcc, 0x3f71584f, 0x3f2225ce, 0x3ded1468, 0x3df3b248,
0x3f38aa36, 0x3f4c3d00, 0x3e5385c0, 0x3eaa1294, 0x3f67406f, 0x3e01e2b8,
0x3eab3856, 0x3e1b7a90, 0x3f29ea98, 0x3eb2ee0c, 0x3ea0b8de, 0x3f74e2a7,
0x3f37a4ba, 0x3e63dd0c, 0x3eba263e, 0x3f142153, 0x3f54cc97, 0x3e1833ec,
0x3ce4fae0, 0x3f4885f6, 0x3e0e6090, 0x3d37e430, 0x3d485e60, 0x3f5f9d3f,
0x3efa6ed6, 0x3f4c4b68, 0x3f27e414, 0x3e0d2370, 0x3e57096c, 0x3e329078,
0x3f6994b6, 0x3f2c8ea3, 0x3eb01968, 0x3f2c91df, 0x3f1b4723, 0x3f75441d,
0x3ed08cd4, 0x3e8e4594, 0x3f3bd23f, 0x3ec0c306, 0x3d793b10, 0x3da3fc98,
0x3ed9eee2, 0x3f2ca505, 0x3f17327d, 0x3ded1528, 0x3f7d8646, 0x3ebd4142,
0x3f60257a, 0x3f0b7bcc, 0x3e9b6968, 0x3ef868ca, 0x3f4fb17e, 0x3f66464e,
0x3f7a95d2, 0x3ccc43c0, 0x3f1317ba, 0x3f020e47, 0x3f772072, 0x3d677c70,
0x3de323d8, 0x3e8718e8, 0x3f6eea63, 0x3f30b4c8, 0x3f1b58d8, 0x3efc1cca,
0x3d02e760, 0x3f35ad69, 0x3f1d6006, 0x3f4be0f1, 0x3d00f4f0, 0x3f2fdd10,
0x3c8f0820, 0x3ec6a986, 0x3d8c8958, 0x3dd64960, 0x3f0c7fc4, 0x3f398ce9,
0x3f0aaddf, 0x3ea13cd6, 0x3e1a154c, 0x3ea00ee0, 0x3e3a2e18, 0x3ee4b0fc,
0x3eac5aca, 0x3ef1e564, 0x3f4b37cc, 0x3f7ae104, 0x3f53ab47, 0x3e6aadd8,
0x3f437532, 0x3f2c7fb9, 0x3e098bd0, 0x3dbaf628, 0x3dc9ce08, 0x3efd63f8,
0x3e8d38b4, 0x3f20934f, 0x3f238d1f, 0x3f76fb8b, 0x3ece9daa, 0x3f3c284b,
0x3f621b30, 0x3e93a29c, 0x3f2e7902, 0x3d809498, 0x3e047910, 0x3e9265b4,
0x3e5703a8, 0x3cf04620, 0x3f653718, 0x3f557720, 0x3e69fd90, 0x3f12701c,
0x3f0d5de4, 0x3ea9fa56, 0x3f60d22b, 0x3f0e7bf6, 0x3f1e16cc, 0x3e23c218,
0x3dbd6d60, 0x3d71d370, 0x3f25f6be, 0x3e99c3a0, 0x3e4554ac, 0x3f5e6597,
0x3eaf413a, 0x3e83ba80, 0x3f390641, 0x3ec20c52, 0x3f64022f, 0x3f1904d1,
0x3f7aeeeb, 0x3f5f26ed, 0x3efe093a, 0x3f2c3af0, 0x3d7c5340, 0x3e771b70,
0x3f3c9ba2, 0x3f3a0f93, 0x3f05971e, 0x3ef2ea5a, 0x3eb8253c, 0x3f7ecfbf,
0x3d086610, 0x3eb88c36, 0x3f55e59a, 0x3f37f6c6, 0x3d0f44b0, 0x3f44576e,
0x3e5041b4, 0x3f2796f7, 0x3e2d9614, 0x3f7879d6, 0x3f767896, 0x3df10540,
0x3f5a5f10, 0x3ed861b0, 0x3e8155c4, 0x3e009588, 0x3efb9706, 0x3f293d23,
0x3efc23ca, 0x3f0e3473, 0x3f1571b8, 0x3ec84a82, 0x3f490ff7, 0x3e3bf170,
0x3f16585d, 0x3f2f5cf0, 0x3e0ecf24, 0x3e21ebe8, 0x3eb877ba, 0x3f70ba33,
0x3f47bb80, 0x3cbc7740, 0x3f495d88, 0x3e83f83c, 0x3ea633d8, 0x3e0cf644,
0x3d8f7018, 0x3f41d8e8, 0x3f59a178, 0x3f1b532f, 0x3f46bc83, 0x3f73f2b2,
0x3e01f8cc, 0x3ee03a50, 0x3e55f9a8, 0x3edd34f8, 0x3f29d7b1, 0x3f4026b2,
0x3f37a4d2, 0x3f6ce2fd, 0x3e7684e4, 0x3f046475, 0x3f0a0a96, 0x3ed47f3a,
0x3f5bea8b, 0x3e743e34, 0x3e4ffb34, 0x3eac9ac2, 0x3d876210, 0x3f7c9b4c,
0x3e796594, 0x3e312c50, 0x3f4fc17e, 0x3ee8d182, 0x3ef8ac4a, 0x3e87a7fa,
0x3f0c75c5, 0x3e7cb45c, 0x3e823ed4, 0x3f3da9a1, 0x3e82d524, 0x3ef124aa,
0x3e53f980, 0x3f129f91, 0x3ec45ebe, 0x3f1c6cea, 0x3f6b277b, 0x3f21c8ee,
0x3ea44fd0, 0x3f49df77, 0x3e2ba6c0, 0x3f3a2841, 0x3f3b98b8, 0x3f4a1a05,
0x3f672f6c, 0x3f4f85ff, 0x3f216157, 0x3e55c7f4, 0x3f14fbd5, 0x3f62832a,
0x3e863746, 0x3e699c84, 0x3eefa3f2, 0x3f2377ae, 0x3cb7d760, 0x3d092840,
0x3f024d7b, 0x3f66ed12, 0x3f27d1a8, 0x3e2e9848, 0x3f7b5743, 0x3f1c8caa,
0x3f355965, 0x3ef1a048, 0x3edc411a, 0x3f16c4b9, 0x3f0e6027, 0x3f5a8af9,
0x3e8ba48a, 0x3f4da2e7, 0x3f34c842, 0x3ead1d9a, 0x3dcb0c20, 0x3eaa8a30,
0x3f082b28, 0x3ee3d874, 0x3ed7ffe4, 0x3e9e102e, 0x3ede2698, 0x3f1fcd07,
0x3ec59d64, 0x3e9040d2, 0x3f6990d6, 0x3ee6a87e, 0x3f551534, 0x3f001e99,
0x3f5344d8, 0x3e9ad0bc, 0x3eb64558, 0x3f35d210, 0x3f0c8331, 0x3eed874e,
0x3f1dac4a, 0x3ea80004, 0x3f67a0b5, 0x3f7f3807, 0x3e53cd80, 0x3ee9d09c,
0x3e079ba8, 0x3f023023, 0x3ef7fdf2, 0x3e8fa2c4, 0x3e88f4a4, 0x3d15e2c0,
0x3f0055cb, 0x3e3edc90, 0x3f0dd291, 0x3c7efa80, 0x3f7e1978, 0x3e39a244,
0x3f63ad73, 0x3f6de737, 0x3e93b68a, 0x3e943ad4, 0x3ecb8976, 0x3f5b8b43,
0x3f2e4fbf, 0x3db65498, 0x3e28e8f4, 0x3f322b22, 0x3f11aea3, 0x3f7af29d,
0x3f4a4c62, 0x3f400888, 0x3e59df3c, 0x3f53e6a2, 0x3d4a6460, 0x3edcc86c,
0x3f2b89f4, 0x3eb6a5e4, 0x3dfa75c8, 0x3f55e704, 0x3f13b35c, 0x3f68712d,
0x3f424599, 0x3e6b005c, 0x3f4c60ae, 0x3edf3cb8, 0x3f582657, 0x3e52bb8c,
0x3f65c0e9, 0x3f04782b, 0x3f3df458, 0x3f113abd, 0x3f388998, 0x3f675761,
0x3ed4f14a, 0x3ecd2304, 0x3eefb426, 0x3f6c7beb, 0x3f710a5b, 0x3f46623c,
0x3f22e4e3, 0x3cc090e0, 0x3f3dc384, 0x3e53b8a0, 0x3f23c092, 0x3f270aff,
0x3f11881b, 0x3ef7a828, 0x3f312f67, 0x3f26f96f, 0x3f666e90, 0x3ef226a8,
0x3f71c6c5, 0x3f6234f2, 0x3f6cac00, 0x3e7ab140, 0x3e6965e4, 0x3ee6d92c,
0x3e6283b8, 0x3f5e2907, 0x3f70c9c0, 0x3edfc8f4, 0x3ee9d348, 0x3f3c36b2,
0x3dedf718, 0x3ef4d5e6, 0x3f79dee7, 0x3ef4f828, 0x3ef67c60, 0x3ed8525c,
0x3f280f4b, 0x3f608778, 0x3e96f1d6, 0x3f147649, 0x3dea3748, 0x3e37a768,
0x3f51ff66, 0x3f1d0a7f, 0x3f6e2bca, 0x3f53c73b, 0x3eb64b60, 0x3f501bb8,
0x3f373852, 0x3f5184b8, 0x3bc63c00, 0x3e93e44a, 0x3e288efc, 0x3e38ba08,
0x3f35fbbb, 0x3f5048b0, 0x3f2eee5d, 0x3edaaa46, 0x3f12a981, 0x3e874fd6,
0x3e16a03c, 0x3f45aefb, 0x3e4f9560, 0x3ea0a1fa, 0x3e583ad0, 0x3f7cc46b,
0x3f391376, 0x3f4a498a, 0x3f57f889, 0x3edafebc, 0x3f0cd50e, 0x3f2801ac,
0x3eecc4b4, 0x3d93a9b0, 0x3f1dc08b, 0x3f61a9f9, 0x3e46e06c, 0x3f39f92e,
0x3dfba050, 0x3f1f1350, 0x3d8276a0, 0x3f7e9125, 0x3f60a00d, 0x3f5f0c12,
0x3e558830, 0x3ed37dfc, 0x3f1d01bc, 0x3f1da1fc, 0x3f4d0476, 0x3e9ca804,
0x3f3c6250, 0x3f2228c0, 0x3e49d904, 0x3eacdd64, 0x3f5e3ab2, 0x3f7c8ff0,
0x3e33698c, 0x3e814d86, 0x3f70bf0b, 0x3f3dc425, 0x3f445f4d, 0x3e27dc04,
0x3ebee4bc, 0x3f7f0527, 0x3eb3740a, 0x3e48be44, 0x3deda760, 0x3f081c3a,
0x3f5e8c2d, 0x3eb45d9e, 0x3dbf1ed0, 0x3e3e09bc, 0x3e2a2c68, 0x3e0ebd48,
0x3f287380, 0x3f38519f, 0x3e8a551a, 0x3f5122b2, 0x3f397f94, 0x3dd3e600,
0x3f46edca, 0x3f6dbc40, 0x3f74de03, 0x3ebfedcc, 0x3e08fbe8, 0x3f0ed371,
0x3eb57908, 0x3da559f0, 0x3f1d54e0, 0x3a916000, 0x3f568f8e, 0x3f010d0f,
0x3b9cc000, 0x3e9fc0d4, 0x3f220e60, 0x3e3336e4, 0x3f367ad3, 0x3f52970e,
0x3f09b04c, 0x3dca8b40, 0x3f60d904, 0x3e6ef8d4, 0x3e04bf9c, 0x3df93a88,
0x3f5406b2, 0x3f6fcd2b, 0x3eb29bd8, 0x3f7db523, 0x3f2764da, 0x3f2cc8ea,
0x3e3f5ff4, 0x3e4ccfa4, 0x3f57f964, 0x3f171a31, 0x3f3ca691, 0x3ed90664,
0x3dbc1da8, 0x3f5f132f, 0x3f3c41a4, 0x3e9ba21c, 0x3f503055, 0x3f1b07bc,
0x3e9bb2fe, 0x3ec3c94a, 0x3f020dc1, 0x3f1fec28, 0x3d841d00, 0x3ecff25a,
0x3ee58bde, 0x3cb74980, 0x3f6f5ba1, 0x3f388085, 0x3ec092b0, 0x3f1c390a,
0x3f6ebccb, 0x3e664650, 0x3eef27f2, 0x3f14bfa6, 0x3ee8a0cc, 0x3f0065d0,
0x3f1f2548, 0x3edbbb54, 0x3e4c356c, 0x3f106c5c, 0x3ea1058e, 0x3d850760,
0x3f509fcb, 0x3f718560, 0x3b839c00, 0x3eb6e0b4, 0x3f0e8edb, 0x3d6aaea0,
0x3ef40940, 0x3cf20bc0, 0x3e20e9d8, 0x3f753841, 0x3dfa5240, 0x3ec00f88,
0x3e73107c, 0x3e7332e4, 0x3e3e6dac, 0x3f7233bb, 0x3f385e2e, 0x3b4f6800,
0x3f7f377d, 0x3f5a50a9, 0x3f2cf977, 0x3e36cb98, 0x3f421c56, 0x3ec57e14,
0x3f1a8b0f, 0x3ea27ea8, 0x3f353c2b, 0x3f308a75, 0x3f0d810d, 0x3f419fe4,
0x3dc3b938, 0x3e8fb798, 0x3ebd9bb4, 0x3f65c159, 0x3e8824da, 0x3f5e9eb6,
0x3f4948d5, 0x3f29f64a, 0x3f0af015, 0x3da556a0, 0x3f1a32af, 0x3f0eda76,
0x3f2aecee, 0x3e72c01c, 0x3f706328, 0x3e95d614, 0x3ef4523c, 0x3f19afb7,
0x3e6f10c8, 0x3e5c692c, 0x3ef7842c, 0x3e07d914, 0x3e0e2e08, 0x3f4749b4,
0x3f0a307b, 0x3f727e45, 0x3f482963, 0x3f58d43e, 0x3f7a85ef, 0x3ee3d7b4,
0x3c0ba340, 0x3eb9240c, 0x3f2c40ce, 0x3f7e9466, 0x3ce1c320, 0x3f0086ee,
0x3d9ab080, 0x3ecc54d2, 0x3f776ccb, 0x3ec39e70, 0x3f5d5768, 0x3eeded96,
0x3ede3db6, 0x3d82a150, 0x3f756354, 0x3e04d104, 0x3eb8d7f6, 0x3f490930,
0x3f7fcece, 0x3f15aedb, 0x3e1ad2dc, 0x3f49b470, 0x3f72bde7, 0x3f73bfb6,
0x3eb2c40e, 0x3ecc8ed8, 0x3e85e01e, 0x3f04b4bb, 0x3f7980ed, 0x3ee04240,
0x3f1ff613, 0x3f453cd9, 0x3e670308, 0x3f06966a, 0x3f207b74, 0x3f729c4b,
0x3cffe4c0, 0x3f108324, 0x3e9600a4, 0x3e82ce5a, 0x3ec994f6, 0x3f3fe22c,
0x3f4e94f3, 0x3f57e3f8, 0x3df9ba48, 0x3ea2a208, 0x3f572c4d, 0x3f62cb7f,
0x3eb9817c, 0x3f672d44, 0x3d9e86f0, 0x3f5ccda2, 0x3f5e79de, 0x3f20ef81,
0x3f33f733, 0x3e09f4ac, 0x3f547cd7, 0x3f2e4423, 0x3e01989c, 0x3ee78190,
0x3f7b02f9, 0x3ee02960, 0x3f39c136, 0x3ec77460, 0x3e1bb978, 0x3f629d46,
0x3eff370a, 0x3f439684, 0x3f6097b1, 0x3f027a4f, 0x3f706aad, 0x3f23a9be,
0x3f113c59, 0x3f69e97d, 0x3ed2ab70, 0x3f18c6a9, 0x3ecf9680, 0x3f010123,
0x3e12f558, 0x3f5c23eb, 0x3f3ec919, 0x3ec2cbd0, 0x3c922460, 0x3f7a6e4d,
0x3e555d10, 0x3f7b3a25, 0x3c4161c0, 0x3f6287b0, 0x3e47d0b8, 0x3eac0d38,
0x3ddd70f0, 0x3c9b68a0, 0x3f1d0ef6, 0x3f4738c3, 0x3ebc1506, 0x3f2d3f6a,
0x3e87be3e, 0x3ea3e890, 0x3ee21114, 0x3dce2b98, 0x3d07ead0, 0x3ee9c9b2,
0x3f032b58, 0x3edd9a9c, 0x3f1dfc57, 0x3f3d67cc, 0x3ea22e9a, 0x3eb85d7e,
0x3eed110c, 0x3f630656, 0x3ec4af4c, 0x3f472946, 0x3f29af7c, 0x3e23486c,
0x3f376384, 0x3f6e03d8, 0x3f336c44, 0x3dee7be0, 0x3e2ba424, 0x3f6d9ed2,
0x3e22b904, 0x3c49ae40, 0x3ebdf868, 0x3f00cc53, 0x3ed1d02e, 0x3bf9bf00,
0x3f6ea249, 0x3ec91796, 0x3ea9c620, 0x3f1e9ff6, 0x3f1eb6c8, 0x3f21c816,
0x3d24dec0, 0x3e6eee2c, 0x3ee6ef9a, 0x3ee1aa84, 0x3f77ec2d, 0x3f492f6a,
0x3f34cbd0, 0x3e24cc90, 0x3ed787e2, 0x3f112d80, 0x3e89a4de, 0x3c542280,
0x3ec63388, 0x3e24d3d8, 0x3e49a7f8, 0x3f21fe49, 0x3f53e9ba, 0x3f7d9a39,
0x3f088ec4, 0x3d9867f8, 0x3f79bb20, 0x3d852288, 0x3ed24420, 0x3e0f725c,
0x3db85c00, 0x3f5f4bc9, 0x3f5ba8c7, 0x3e01f854, 0x3f419dbc, 0x3f3e906b,
0x3f0cd816, 0x3ef6765e, 0x3f52b96e, 0x3f31a547, 0x3f703a25, 0x3f389339,
0x3deb1c60, 0x3f57e834, 0x3e8c26c2, 0x3f45feb1, 0x3dd6fb30, 0x3f037d1e,
0x3d9b0c90, 0x3e057c9c, 0x3f599f1f, 0x3e0c762c, 0x3f76a82d, 0x3f24342e,
0x3eb22cc6, 0x3f519440, 0x3e18b588, 0x3f586054, 0x3e5475d4, 0x3f3ab683,
0x3e267038, 0x3ef4188a, 0x3f5cf39c, 0x3ed45ff0, 0x3d62cf90, 0x3f588506,
0x3ea692da, 0x3f5ee54f, 0x3ee416da, 0x3f56b3f2, 0x3e951744, 0x3ef06706,
0x3f664f2d, 0x3f3a3575, 0x3ed6c5fc, 0x3eac3316, 0x3ebd8dd8, 0x3f0f22e4,
0x3f174bd5, 0x3ee9f2ae, 0x3e721ae8, 0x3f2835cf, 0x3f118f07, 0x3f0b0cd0,
0x3f262426, 0x3f4a4512, 0x3f02b1ad, 0x3f60fcbe, 0x3ea682f4, 0x3f30fc50,
0x3f305809, 0x3eabdaf2, 0x3eff5ea0, 0x3f423fd7, 0x3b88ef80, 0x3f2a4423,
0x3f1d6e73, 0x3f143475, 0x3ef5d70e, 0x3cca67e0, 0x3f1e6546, 0x3f385b79,
0x3f35ab29, 0x3f122ab6, 0x3f49c5b6, 0x3ec25e9e, 0x3e039dac, 0x3ebad9f8,
0x3f5e02b0, 0x3f024f09, 0x3da6eed0, 0x3d9e6bb0, 0x3ec78492, 0x3f2d84d2,
0x3f0d85f5, 0x3ee70d2c, 0x3e345bcc, 0x3f1af613, 0x3f3fed1b, 0x3ecd5254,
0x3f4e0343, 0x3f695429, 0x3e8bb032, 0x3f018a66, 0x3f12afca, 0x3f7bec67,
0x3f39960d, 0x3f10af91, 0x3d8e22b8, 0x3f351413, 0x3eb0020e, 0x3ad9fe00,
0x3edc1bee, 0x3e28de40, 0x3e6b1818, 0x3ea3b442, 0x3e016fac, 0x3ed68ad2,
0x3e99569c, 0x3f49137a, 0x3f55621e, 0x3edce8de, 0x3f7f6758, 0x3ed778de,
0x3f7f0637, 0x3e2ae554, 0x3f202c8b, 0x3f70d27f, 0x3e12963c, 0x3f340dea,
0x3f76765a, 0x3f29672b, 0x3e74f948, 0x3ec404a8, 0x3f469499, 0x3e6c3cd8,
0x3e4a7b58, 0x3f78f6c8, 0x3ece42bc, 0x3f7addc1, 0x3f4fd36f, 0x3ce962c0,
0x3f5417df, 0x3f3fff09, 0x3e6101f0, 0x3ee25768, 0x3f21652c, 0x3d82b0f8,
0x3eef5cfc, 0x3dda8d90, 0x3ea0d042, 0x3f25d9df, 0x3f3f94de, 0x3f6a0f1b,
0x3e8608c8, 0x3eddeadc, 0x3f42b07d, 0x3e98b09a, 0x3f1ceb72, 0x3f739382,
0x3f0004ac, 0x3ea38fc6, 0x3f5f92cf, 0x3f5dd3ed, 0x3f3c1247, 0x3e0960d4,
0x3f17b78e, 0x3eeafc04, 0x3e762f94, 0x3f6d5945, 0x3f3702a2, 0x3e9f64e2,
0x3eeb56ca, 0x3f494bbf, 0x3c81b540, 0x3f6de5b1, 0x3dd375e0, 0x3f274922,
0x3e871a1a, 0x3f307c6a, 0x3f5c5c35, 0x3e5f5ca8, 0x3e63b024, 0x3f49ce50,
0x3f47ecbe, 0x3f0d0f5d, 0x3e871360, 0x3dedf9c0, 0x3f5057e6, 0x3eded724,
0x3f34c89b, 0x3f2068e8, 0x3eaf5e08, 0x3d902850, 0x3e595034, 0x3f3278a9,
0x3f31579b, 0x3f20089d, 0x3f3263be, 0x3f262b0a, 0x3e057894, 0x3dce3d30,
0x3ef6cfe0, 0x3d9413c0, 0x3f209b96, 0x3f7a0cb1, 0x3f718876, 0x3e5a9b0c,
0x3f1f45f7, 0x3ec7e492, 0x3f0c2fcd, 0x3e840b70, 0x3f546fb5, 0x3f600fa1,
0x3ea64ede, 0x3f120f60, 0x3f7ef5c7, 0x3eba7854, 0x3f2cf1bd, 0x3f1ef247,
0x3e309b54, 0x3de4fe28, 0x3dba0a50, 0x3ec256fa, 0x3f07e755, 0x3ece9524,
0x3e255ffc, 0x3ec766c8, 0x3e9dbbd6, 0x3cdc3500, 0x3f200393, 0x3f71f2f3,
0x3f6499b1, 0x3f470399, 0x3eef0260, 0x3f502ece, 0x3e6ab1e8, 0x3f61eef3,
0x3ea1c83a, 0x3e2c60a8, 0x3ecae280, 0x3f01aee0, 0x3f3d6ed0, 0x3f448546,
0x3e459bb4, 0x3f6edde2, 0x3f7d07c3, 0x3f4f7933, 0x3f23e0eb, 0x3e2d0418,
0x3e4fcec8, 0x3f35e581, 0x3f08ab57, 0x3e9397e2, 0x3ee99216, 0x3e98b25c,
0x3f7df9ca, 0x3df3e7d8, 0x3f1f190c, 0x3e50ec10, 0x3ec4b8b2, 0x3efb67ae,
0x3ec1f562, 0x3f7ff33b, 0x3ec77a62, 0x3e33b8d0, 0x3eff590e, 0x3f4408d8,
0x3e4eb4a8, 0x3eafb0d2, 0x3f166ea0, 0x3c44d680, 0x3eec12c4, 0x3efeed80,
0x3e84fa62, 0x3f59854e, 0x3e468548, 0x3cbd9500, 0x3e4082c8, 0x3f24a064,
0x3f72db6a, 0x3f35d480, 0x3f279af7, 0x3f5d2516, 0x3f63b61a, 0x3f6f14a3,
0x3ef771c2, 0x3e54ac3c, 0x3ebad670, 0x3e9d4f0c, 0x3ed954ba, 0x3f2a1382,
0x3f478ce4, 0x3e431f24, 0x3e37db34, 0x3f48fbb0, 0x3e526f24, 0x3f08827c,
0x3f42d461, 0x3f2c2877, 0x3f7846d1, 0x3f54c2bf, 0x3f737489, 0x3f5ce33c,
0x3f388597, 0x3f6d26e0, 0x3f0cf819, 0x3ead938c, 0x3f64a4b6, 0x3da59d88,
0x3f5bb7b7, 0x3ec3531a, 0x3f2c7d1f, 0x3e7817c0, 0x3f40fdb2, 0x3eca7f2a,
0x3dad5650, 0x3f451750, 0x3eb5663c, 0x3e4113e0, 0x3f27640a, 0x3f590f3e,
0x3f47f6f7, 0x3f761643, 0x3f0a6118, 0x3f429106, 0x3e388134, 0x3def1fa0,
0x3f5f1956, 0x3eb708cc, 0x3dce13b0, 0x3f71fb6d, 0x3f2d06f2, 0x3eef075c,
0x3f42109d, 0x3f7c6db5, 0x3f10916f, 0x3f258d7d, 0x3f6144eb, 0x3e859e92,
0x3e7dfa08, 0x3ea6d616, 0x3edfcbfa, 0x3e96d04e, 0x3f3860ee, 0x3ebc752a,
0x3e8c31f8, 0x3f0182ed, 0x3ed46c60, 0x3f595434, 0x3ea8ac68, 0x3f676640,
0x3c3af8c0, 0x3f5e2d50, 0x3f28fb88, 0x3f7621f0, 0x3f01d8b4, 0x3f02033b,
0x3d55bfe0, 0x3e7976e4, 0x3e115b50, 0x3efd8986, 0x3d1fdb10, 0x3ef19500,
0x3f09c718, 0x3f4d05ce, 0x3f4be92f, 0x3f1f75f6, 0x3d2dad70, 0x3eaeba52,
0x3f24ea89, 0x3ef8bce2, 0x3f3b2776, 0x3ef9a9ae, 0x3f4ab469, 0x3f3d4feb,
0x3f4052e2, 0x3f21b064, 0x3f0f8001, 0x3ecfe7e4, 0x3eb79db8, 0x3ed0967c,
0x3f06e2a0, 0x3e1962f8, 0x3f464dcb, 0x3f0c8942, 0x3f7d604d, 0x3e2c508c,
0x3e6ef3a4, 0x3ed3f7ca, 0x3e10c3e8, 0x3f6f9f37, 0x3eebbaac, 0x3f15cef3,
0x3ef75d62, 0x3e997036, 0x3f709b61, 0x3f4b4305, 0x3f5955f7, 0x3f1c7f75,
0x3f0eacd7, 0x3f72cec1, 0x3ebf8114, 0x3f0c8ed3, 0x3f4742ee, 0x3f464d9f,
0x3f75dcfe, 0x3e2c5308, 0x3e5945e8, 0x3eb73a0a, 0x3e758788, 0x3eee1b88,
0x3f2e979f, 0x3f5f59c1, 0x3f012723, 0x3e8213e6, 0x3efe2ed4, 0x3f1e4948,
0x3f11a540, 0x3f2c9681, 0x3f047e2b, 0x3f148f4b, 0x3ef172c6, 0x3f617cf0,
0x3ee21d74, 0x3f5a87a4, 0x3f1c876a, 0x3d98f4e0, 0x3d2e0570, 0x3ea43b50,
0x3f3e95c5, 0x3e608390, 0x3edf1b0e, 0x3ed9c0ac, 0x3f0516ce, 0x3f6532c6,
0x3f5d2091, 0x3ea40344, 0x3f21a37e, 0x3f7adb96, 0x3c20e5c0, 0x3e227b3c,
0x3f11773c, 0x3d519c50, 0x3f1c6945, 0x3e11dbe4, 0x3f0d3b70, 0x3e8804e8,
0x3dfc17f8, 0x3f669265, 0x3edb721c, 0x3eafdb3c, 0x3f5bc025, 0x3f4724fc,
0x3f7905ff, 0x3ecc3dc6, 0x3e2f1a60, 0x3e62e3a0, 0x3eb7ea3c, 0x3f1b97e4,
0x3ed44ac6, 0x3f5184ce, 0x3d09b140, 0x3ee1a34a, 0x3f748cae, 0x3f763296,
0x3d5cb6e0, 0x3e9bb7f0, 0x3f6e5e2a, 0x3eeb54ec, 0x3eb429be, 0x3e64c554,
0x3e0195d0, 0x3f6eb90a, 0x3e6dce14, 0x3e9437cc, 0x3f3407a3, 0x3ec0175a,
0x3d7a6b40, 0x3f195874, 0x3ec5015e, 0x3e1c1db8, 0x3ef41d56, 0x3ecb2b26,
0x3f2c8782, 0x3f1efa48, 0x3f40ca3d, 0x3f75800f, 0x3f334287, 0x3f4d3838,
0x3f63d41c, 0x3f6a4a13, 0x3e4f668c, 0x3f1df918, 0x3f66fe12, 0x3eafd92c,
0x3e1da3fc, 0x3eb57c2c, 0x3e5e673c, 0x3e193e68, 0x3db898f8, 0x3f74ea47,
0x3e941964, 0x3f24994a, 0x3f0f8cc0, 0x3ecd12ca, 0x3f535c86, 0x3eb28f96,
0x3f031a2b, 0x3e6ac940, 0x3f03c80f, 0x3f1a73a1, 0x3f1df14f, 0x3f3e98f6,
0x3f70c78e, 0x3f268bdd, 0x3f7998c6, 0x3ee4e72a, 0x3e00818c, 0x3ecc361e,
0x3eaeb8a0, 0x3f1c39bb, 0x3bab2f00, 0x3f17ad32, 0x3f47d9b9, 0x3f6512cb,
0x3f3dd5dc, 0x3f129b36, 0x3ef6e4ec, 0x3f130246, 0x3f400fe5, 0x3f4cecae,
0x3f533548, 0x3ef5ccf0, 0x3e1911d8, 0x3e2ee04c, 0x3f1c197f, 0x3f2b6231,
0x3e9ace5e, 0x3f3e1eaf, 0x3f6ac40d, 0x3f13fd09, 0x3f03eb36, 0x3f0b0e09,
0x3e2bf2fc, 0x3ea72f8e, 0x3e845bc6, 0x3f65f06c, 0x3e6c6440, 0x3ed45fc0,
0x3e99bdf2, 0x3e62ea74, 0x3f503c08, 0x3f5c98d4, 0x37760000, 0x3edabe46,
0x3f69fcc7, 0x3ebe0d90, 0x3dccd1a0, 0x3e1df22c, 0x3dae55e8, 0x3f3c2eb1,
0x3f50f0f1, 0x3f379a5a, 0x3f7bd5df, 0x3e90ea36, 0x3f69acab, 0x3ea7fd44,
0x3f530572, 0x3d8696f0, 0x3e147390, 0x3eb72930, 0x3f3c68c1, 0x3f7f6004,
0x3f0871dc, 0x3e8f7f80, 0x3ed03f4a, 0x3f27246a, 0x3e1fde98, 0x3f4bd5ca,
0x3f3d3117, 0x3e018b30, 0x3ebf3038, 0x3ec2f070, 0x3f3796c9, 0x3f5c0430,
0x3ed1e610, 0x3f547dc5, 0x3ef7b59a, 0x3e8e159e, 0x3f6ba379, 0x3f4539b2,
0x3f5237cc, 0x3e563bd8, 0x3f3fcef1, 0x3ea22640, 0x3f1b5c43, 0x3e18c0d4,
0x3d9429a8, 0x3f68b0f0, 0x3dd51f88, 0x3f154c7c, 0x3ea7ebae, 0x3f496013,
0x3f7e4d14, 0x3e179958, 0x3f2399cd, 0x3edd1a5a, 0x3eddec8a, 0x3f7a5a26,
0x3f27b67a, 0x3ea973d4, 0x3f46095d, 0x3f00956f, 0x3f077540, 0x3ec270b0,
0x3f192a75, 0x3e9d0352, 0x3f40a28f, 0x3e9436ce, 0x3f291746, 0x3f042d5d,
0x3e983db2, 0x3f315562, 0x3e9644dc, 0x3f4e2e54, 0x3db32a10, 0x3f0d8a75,
0x3e19ea78, 0x3f76b767, 0x3f0571eb, 0x3f0cff22, 0x3d00cc70, 0x3e94c912,
0x3efac0be, 0x3f2ef1d9, 0x3ce83380, 0x3f7f51c3, 0x3f197e77, 0x3f089771,
0x3e0e7a54, 0x3f08b53a, 0x3da52488, 0x3e474f88, 0x3e7d6fc4, 0x3e0af4f8,
0x3f0e389e, 0x3f2fc5cc, 0x3e125a90, 0x3e50ef88, 0x3f343717, 0x3f282bb7,
0x3f3567c3, 0x3dc3d358, 0x3f37dc10, 0x3e825f14, 0x3f3b96f0, 0x3f487608,
0x3f6f7596, 0x3f1b5452, 0x3f69697b, 0x3f7ed8c8, 0x3f233160, 0x3ea0a56e,
0x3da6f468, 0x3e24c4ac, 0x3e914dea, 0x3ebd8b8a, 0x3f6d1064, 0x3f5c98d4,
0x3e68ffc0, 0x3e4b04b0, 0x3f5a1db2, 0x3e9ab326, 0x3e1218fc, 0x3dd02ba8,
0x3f44866b, 0x3e51c74c, 0x3ebd1708, 0x3f7227b7, 0x3f64907a, 0x3ee79d14,
0x3f777af7, 0x3f771175, 0x3ef268d6, 0x3f326ec2, 0x3e8dbe4e, 0x3f75b929,
0x3f375dc2, 0x3f2195da, 0x3d51ccc0, 0x3f2d8eb5, 0x3f689246, 0x3f57db6e,
0x3e5ddc70, 0x3e437958, 0x3ea17d9e, 0x3f5d2337, 0x3da0b840, 0x3ebbee8a,
0x3ea81168, 0x3ef0c596, 0x3ec6c514, 0x3e422e20, 0x3f40c452, 0x3f2bd24e,
0x3e442174, 0x3c9587a0, 0x3ed7ec3e, 0x3f3429fb, 0x3f6dfcf8, 0x3ebf84b2,
0x3f4a9022, 0x3f23dafd, 0x3f774fee, 0x3f0819bd, 0x3f6a9486, 0x3f44cb26,
0x3d82f4c8, 0x3f357278, 0x3f56480b, 0x3ecc3076, 0x3ee6a840, 0x3f591675,
0x3da4caa8, 0x3f718358, 0x3f2f435c, 0x3f08fcb0, 0x3f6680ef, 0x3f39e1ae,
0x3f6e36b5, 0x3f314f87, 0x3f12309a, 0x3e0262b0, 0x3ee36666, 0x3f1af004,
0x3da00238, 0x3d93b0b0, 0x3f007353, 0x3e7a2bd8, 0x3f1d6c7f, 0x3e193548,
0x3e887c04, 0x3ecc8254, 0x3ece8006, 0x3eb2aa5a, 0x3e9b381c, 0x3f39d3ca,
0x3ebe38de, 0x3f75894c, 0x3f49b065, 0x3f270eaa, 0x3f7ed96a, 0x3e6ed5dc,
0x3f4c2350, 0x3e9635ea, 0x3de0b218, 0x3e9b0c62, 0x3ea4223e, 0x3f7df066,
0x3f7bb910, 0x3f5b64a4, 0x3e13dda0, 0x3e113c14, 0x3db82f18, 0x3e912f40,
0x3e4f07b4, 0x3f26b861, 0x3dbeda70, 0x3f37c8de, 0x3f0ac7fa, 0x3e979360,
0x3dfff0f0, 0x3f4fd16a, 0x3ea211aa, 0x3e5fc338, 0x3dd4b6c8, 0x3f6be5a1,
0x3f1ddc23, 0x3f500bab, 0x3e86441c, 0x3f402a0c, 0x3c4f3cc0, 0x3e8e86bc,
0x3f24c881, 0x3e67bcf0, 0x3cf3b500, 0x3f6f5123, 0x3f35378c, 0x3f12fdd1,
0x3f4b46a9, 0x3f34d642, 0x3f618818, 0x3ecb9592, 0x3f7a0d98, 0x3e297414,
0x3f49405a, 0x3f1941c8, 0x3f60ed84, 0x3f16fd3a, 0x3f7cc034, 0x3db31428,
0x3e3d6ac0, 0x3e04cb88, 0x3e7b7324, 0x3e93eb60, 0x3f794bef, 0x3f0c8e33,
0x3f19ebb9, 0x3f7f787c, 0x3d9427d8, 0x3f490a6b, 0x3b96de80, 0x3eb57cdc,
0x3e4325e0, 0x3f2ca37d, 0x3ed3a328, 0x3eb401d6, 0x3f6f8932, 0x3f4eab48,
0x3f59202a, 0x3d749bf0, 0x3f727c17, 0x3f6db717, 0x3ebe009c, 0x3e841b4e,
0x3f0eb52a, 0x3e8021be, 0x3f196472, 0x3f764abd, 0x3f1a2d89, 0x3f2be68f,
0x3f54858e, 0x3f0ff02c, 0x3e340fe4, 0x3f2185cd, 0x3f7cb696, 0x3f26a29f,
0x3eafc8f0, 0x3d90c708, 0x3edaf2e0, 0x3f2521cc, 0x3ef49a98, 0x3f58dcb6,
0x3ea7d180, 0x3e1bb02c, 0x3f6a1aad, 0x3eb394a2, 0x3f301c25, 0x3f520bd8,
0x3e07aeb4, 0x3f0b853a, 0x3f7984f2, 0x3f497a48, 0x3f314461, 0x3e325f8c,
0x3d051e70, 0x3ded2620, 0x3ec6e40e, 0x3d518140, 0x3f7649c4, 0x3f1936a4,
0x3f51b52c, 0x3ee85672, 0x3e944a30, 0x3f1540d1, 0x3f18997e, 0x3ba31200,
0x3f552ab5, 0x3f7b8b95, 0x3ec389ba, 0x3f642864, 0x3ecfd75a, 0x3f2fb0f3,
0x3f5da11b, 0x3e56fed4, 0x3eebb204, 0x3f6e77d9, 0x3f16f32e, 0x3f3f9efa,
0x3d98b068, 0x3f37a106, 0x3f6f2d74, 0x3e820e62, 0x3f50fc13, 0x3f0fa747,
0x3f3f2ca1, 0x3da95338, 0x3e06afc8, 0x3e9512ea, 0x3da86140, 0x3f51e6d0,
0x3ad07e00, 0x3efbf832, 0x3dc26948, 0x3e8d6f54, 0x3e8ede88, 0x3f79a642,
0x3f331cd7, 0x3f700323, 0x3ef31d8e, 0x3f78050a, 0x3f71aa8a, 0x3e4414c8,
0x3f6f6c5f, 0x3e064450, 0x3d210320, 0x3e1c088c, 0x3f0a410b, 0x3ed78a88,
0x3e1d6094, 0x3d4ce330, 0x3f7e5f07, 0x3f63d241, 0x3f4c3674, 0x3dc16bc8,
0x3df80890, 0x3edabcee, 0x3eb94008, 0x3da77048, 0x3e9e736e, 0x3ee674e4,
0x3df7be78, 0x3ef96414, 0x3f09281f, 0x3e2c02b8, 0x3e74362c, 0x3f2e0ee1,
0x3f01ff5a, 0x3f3de5d5, 0x3d8127b8, 0x3f5b9b46, 0x3f7b98f1, 0x3e993f08,
0x3f089379, 0x3f03021d, 0x3e32cabc, 0x3f19790e, 0x3cdba020, 0x3ddea6f8,
0x3f356643, 0x3f76f320, 0x3f51fa9b, 0x3e5c9b1c, 0x3eb22fca, 0x3e0a7a20,
0x3ed8e80e, 0x3f51cdf9, 0x3e9b8816, 0x3ec376ac, 0x3ec8e688, 0x3f04a40b,
0x3f3ab4b5, 0x3e94d3c4, 0x3db22238, 0x3c023640, 0x3dbb4a48, 0x3f679fb9,
0x3e8d1bf6, 0x3e420610, 0x3e1b9004, 0x3f16b085, 0x3f6a6b6e, 0x3f2a1907,
0x3e39c22c, 0x3f576cf2, 0x3e3b22b4, 0x3f152e47, 0x3d638720, 0x3cbcc3a0,
0x3ef31bc2, 0x3f1fca54, 0x3f451b37, 0x3e4b8b68, 0x3f23c45b, 0x3f248fcf,
0x3f75e088, 0x3f04151e, 0x3f4e1756, 0x3f42adec, 0x3f6571ef, 0x3f322735,
0x3f3c11c4, 0x3ede675c, 0x3f0e2a15, 0x3eb09402, 0x3e3b6b7c, 0x3c472800,
0x3d898160, 0x3f39b80e, 0x3f480a1e, 0x3f5deff4, 0x3e55e4d0, 0x3f21db91,
0x3ee97c30, 0x3e8f32f8, 0x3f5bb475, 0x3dbda638, 0x3f16f12e, 0x3f52ed8d,
0x3f0257ca, 0x3f544fdb, 0x3f280268, 0x3e9a594a, 0x3ee546e8, 0x3e1c2810,
0x3ec87438, 0x3f5fb336, 0x3f03501b, 0x3f2b7924, 0x3f22f754, 0x3d4aea80,
0x3f02ec34, 0x3f7f79e5, 0x3f25fc9c, 0x3c08a280, 0x3dcb3898, 0x3e7b0694,
0x3f0afae0, 0x3f3f0133, 0x3f768fbc, 0x3ddf8810, 0x3d6f1a50, 0x3f13fe60,
0x3ea37c1c, 0x3f1f9dcd, 0x3f40042f, 0x3dc8cb58, 0x3f712d9b, 0x3f2302c8,
0x3ef349a6, 0x3e61a508, 0x3f6d73d4, 0x3f355fca, 0x3e30d0cc, 0x3ef5a774,
0x3f2d61c2, 0x3f5efa8a, 0x3f705611, 0x3edc25bc, 0x3f190445, 0x3f0350b8,
0x3e40d918, 0x3e916348, 0x3ec95924, 0x3f52e13c, 0x3ef55e82, 0x3eb2f05c,
0x3ea39b5e, 0x3f242856, 0x3f3284b4, 0x3e94b6fa, 0x3e9fd99c, 0x3f684583,
0x3d4b4f00, 0x3ee81350, 0x3ef81ac2, 0x3f0faca6, 0x3ef5fe10, 0x3f53eda8,
0x3f7f633f, 0x3e54deec, 0x3f5d2e90, 0x3f14fe52, 0x3e9fe104, 0x3d0178c0,
0x3e98eb60, 0x3f2444c8, 0x3d957178, 0x3f6bedcb, 0x3b8f6800, 0x3eb3c32a,
0x3efcfd92, 0x3ee36d4e, 0x3ec223d0, 0x3ef54b3e, 0x3de4dc98, 0x3ef31280,
0x3f776a3f, 0x3e0853a8, 0x3f6c49fb, 0x3e53d250, 0x3ebe0a70, 0x3f359f3b,
0x3eafb468, 0x3e6b2b74, 0x3f02f05c, 0x3ecdf21c, 0x3d4ba150, 0x3eb68e26,
0x3f2686fd, 0x3f19725c, 0x3f2a08d3, 0x3eeef34e, 0x3f495faf, 0x3f735e74,
0x3e73594c, 0x3d8eb898, 0x3d8283b8, 0x3ecdba8a, 0x3e845460, 0x3ef0f7c0,
0x3f086c16, 0x3e89c828, 0x3ea2e1be, 0x3edf4e2c, 0x3e295d7c, 0x3f7152ad,
0x3f0002ea, 0x3f2e07dc, 0x3d9c6f88, 0x3eb81618, 0x3eee1092, 0x3e15a800,
0x3e6d6918, 0x3e594614, 0x3f26ea9e, 0x3f3a22f5, 0x3da12e30, 0x3f54fb12,
0x3d1c9100, 0x3f75dcac, 0x3f1702c1, 0x3cbffaa0, 0x3f5c777d, 0x3f246abf,
0x3d8de358, 0x3e4e8564, 0x3c0ec480, 0x3f2f3d52, 0x3f4f0720, 0x3f5869b7,
0x3d8d5068, 0x3f0ff831, 0x3ebf6a60, 0x3e356d04, 0x3f1b0841, 0x3f2a51d1,
0x3f48756f, 0x3e902698, 0x3eba6c6e, 0x3f789b74, 0x3dfe4558, 0x3ce61320,
0x3f192210, 0x3f55bdb5, 0x3e12caf0, 0x3f0051a6, 0x3f449833, 0x3f25bab1,
0x3f490b43, 0x3f227569, 0x3f786323, 0x3f0c3697, 0x3dcf02e8, 0x3e831d0a,
0x3ed29ea0, 0x3f396ad5, 0x3f2039f1, 0x3e107048, 0x3f666b9c, 0x3ea8e710,
0x3eaafe84, 0x3f0b19e0, 0x3e727ff8, 0x3ec4a136, 0x3ec690bc, 0x3db7b210,
0x3f4ba9b6, 0x3f70667d, 0x3f59ce18, 0x3f53bd30, 0x3ef17d8c, 0x3f207066,
0x3dec0188, 0x3f6eaeb3, 0x3ea4d9a2, 0x3f3e6334, 0x3d62ce10, 0x3ec38d8e,
0x3e6c502c, 0x3eec5906, 0x3f440057, 0x3e28bcd0, 0x3f29cfe6, 0x3edc2074,
0x3f718e70, 0x3c7b0c80, 0x3ef89c8e, 0x3f66bdf1, 0x3ebfdf80, 0x3df57108,
0x3e51b634, 0x3f7e035b, 0x3e5233dc, 0x3ee5b03a, 0x3ed29e72, 0x3f688ef0,
0x3ee63522, 0x3e7b6be4, 0x3f32fb4e, 0x3f68a697, 0x3eaea96a, 0x3f615116,
0x3f0965aa, 0x3e7e3d74, 0x3ca56180, 0x3f0929c4, 0x3ea08428, 0x3e9a6712,
0x3f3b5687, 0x3f3f1e73, 0x3f0ece49, 0x3e9d3b28, 0x3f10666d, 0x3f027f65,
0x3ef73f7a, 0x3f0d5c89, 0x3f7f0a14, 0x3f12fdaf, 0x3f1fecbd, 0x3dc6ca98,
0x3f01277f, 0x3e443e84, 0x3f67d3ec, 0x3f48fae8, 0x3f5ad5fa, 0x3eb0656c,
0x3e6a58dc, 0x3f0bd847, 0x3f057ab0, 0x3ca21960, 0x3ed907c2, 0x3f5b3862,
0x3f1fedb1, 0x3f4d2730, 0x3edfab0c, 0x3e81717c, 0x3f4b6bc1, 0x3f53697c,
0x3f2ba5cf, 0x3ea23ff8, 0x3ebd7fa0, 0x3f5a0f85, 0x3c5d5980, 0x3f279fd2,
0x3f27bad6, 0x3e306e0c, 0x3f448674, 0x3f0b90af, 0x3f1d7452, 0x3e453514,
0x3e1ba2e8, 0x3f2b871b, 0x3f07d16e, 0x3e7b9008, 0x3f04f1cf, 0x3f3c6147,
0x3d9c4208, 0x3e7ecff4, 0x3f671413, 0x3eec57aa, 0x3ebcbd72, 0x3eb239a4,
0x3ea43872, 0x3dbc7008, 0x3e5c5784, 0x3f449f13, 0x3f2ebe92, 0x3ee73112,
0x3f03f3cb, 0x3f4ea556, 0x3df7d078, 0x3f03f37a, 0x3ef82142, 0x3f1619d0,
0x3ed6933e, 0x3f33b79e, 0x3eaa2022, 0x3dc7ef58, 0x3ee5e184, 0x3d5be810,
0x3da4ed10, 0x3e5fa068, 0x3f588973, 0x3c3b4740, 0x3f77de66, 0x3ef4d7bc,
0x3ee91b02, 0x3ee58602, 0x3f5222ed, 0x3f70cbe0, 0x3d60d810, 0x3e88ceb2,
0x3f534e68, 0x3e1a306c, 0x3f458c71, 0x3eb8ca22, 0x3e442da8, 0x3e82504a,
0x3edc88a8, 0x3f21da37, 0x3e115da0, 0x3e01f7ec, 0x3e9c3eb6, 0x3f1bfd38,
0x3f780f95, 0x3e404f90, 0x3f7e7462, 0x3ed94262, 0x3f6f9ccf, 0x3eb7fd7a,
0x3f6b0471, 0x3d44c3f0, 0x3f50ae5d, 0x3f34d178, 0x3ead6a08, 0x3f27a4b6,
0x3e914308, 0x3e90cc92, 0x3d1debe0, 0x3f61c1b9, 0x3f134823, 0x3f2a814d,
0x3eeedd64, 0x3f1b15c8, 0x3f31a1ba, 0x3e4437f8, 0x3ecc2796, 0x3ce41b20,
0x3f002597, 0x3f39a677, 0x3f55c778, 0x3f47d34e, 0x3f543983, 0x3f05609f,
0x3f2bb2d6, 0x3daec9a8, 0x3ed9beae, 0x3e9859c2, 0x3f51ae0b, 0x3f6fce44,
0x3f4566e2, 0x3e913e3e, 0x3f206163, 0x3f1ad23e, 0x3e22e3c8, 0x3e417f88,
0x3f79ad00, 0x3f19f81d, 0x3ed73a18, 0x3eea5daa, 0x3f0b5bdb, 0x3f40cff4,
0x3f6f8603, 0x3ec2c82e, 0x3f5c4bb2, 0x3e08c228, 0x3ca40d80, 0x3d450fe0,
0x3eb59a7a, 0x3f3f3c4a, 0x3f617c07, 0x3ecf6f26, 0x3e913fbc, 0x3f2d4a48,
0x3f3ababf, 0x3e2afc54, 0x3eda0a2e, 0x3d51dd00, 0x3e02f2d4, 0x3e0d2200,
0x3e88f5a6, 0x3ebb2084, 0x3f64dc94, 0x3d86c5f0, 0x3eb21452, 0x3f63498b,
0x3f319c3b, 0x3eb7b504, 0x3f1e95c3, 0x3e949938, 0x3effa1e4, 0x3de08088,
0x3f4a7323, 0x3e7d6334, 0x3ebf5070, 0x3ea2d94c, 0x3ea66a3c, 0x3f04a629,
0x3f7015cb, 0x3f2eb99a, 0x3f398026, 0x3e529a40, 0x3e9c4dde, 0x3f772f27,
0x3f21f326, 0x3dded688, 0x3f77cb6f, 0x3f2b44a8, 0x3eadb7dc, 0x3f3e617b,
0x3f199ddb, 0x3f7eedbe, 0x3f1bdf85, 0x3ed3f304, 0x3f13228e, 0x3ed8bd6a,
0x3f1a097e, 0x3f32b1e2, 0x3f18417f, 0x3f68e58c, 0x3db6e728, 0x3f38846f,
0x3e628ecc, 0x3f063374, 0x3f6076f0, 0x3f673242, 0x3f67ca8e, 0x3f1f410b,
0x3f4ae052, 0x3f5e3eea, 0x3e957e5a, 0x3ee74f20, 0x3e89cc60, 0x3eb12bba,
0x3e84cbb0, 0x3efd8618, 0x3f4f8b28, 0x3ed1d7ca, 0x3efb6f0e, 0x3ebd8642,
0x3f7b414a, 0x3c8b5780, 0x3f277100, 0x3efcffaa, 0x3dffb6d0, 0x3eb2fe7a,
0x3f406b21, 0x3e8bdc72, 0x3f0455ad, 0x3ef692cc, 0x3d683510, 0x3d1d6250,
0x3ef59622, 0x3f477071, 0x3f58f999, 0x3f20c9a7, 0x3e40e780, 0x3e341ffc,
0x3f085ed3, 0x3ed67fe4, 0x3db2b568, 0x3ee90dfe, 0x3da8dee0, 0x3eee5dca,
0x3c9905e0, 0x3ec8013c, 0x3ebd557a, 0x3f6fd089, 0x3c09da80, 0x3f43fe75,
0x3d931318, 0x3e2be0ac, 0x3f7c5cfb, 0x3f165119, 0x3f570b66, 0x3dca4978,
0x3a9e6400, 0x3f21fa05, 0x3ed49f6a, 0x3f1d92b0, 0x3f7b7b36, 0x3f5c4291,
0x3f2cc8b5, 0x3e87901c, 0x3eb2a546, 0x3dbc1e70, 0x3e4bf77c, 0x3f726fa6,
0x3e5c56cc, 0x3e1ece54, 0x3e172bc0, 0x3ce629a0, 0x3f71d461, 0x3e62af64,
0x3e911fbe, 0x3e8ee43a, 0x3bb3c580, 0x3f06e073, 0x3f0f2e42, 0x3f373e46,
0x3eb56d8e, 0x3c9837e0, 0x3f3223cd, 0x3f552bb2, 0x3f79360c, 0x3f55cc2a,
0x3f49cfe7, 0x3f24dfb5, 0x3ed5161c, 0x3eb6a25c, 0x3dcbc9b0, 0x3e800750,
0x3ea240ce, 0x3ede991a, 0x3e8f8746, 0x3f2602ef, 0x3f16672c, 0x3f3f3a56,
0x3f1a264b, 0x3f546edd, 0x3f18e677, 0x3d242890, 0x3f27d72e, 0x3f73a182,
0x3f0dffe6, 0x3e888c62, 0x3f5c4d18, 0x3f169cbe, 0x3d7b8c40, 0x3f605ca4,
0x3f44e1c3, 0x3f4c64c3, 0x3f3b6024, 0x3ec74bb4, 0x3f4fe7ec, 0x3d031120,
0x3f1a9c3a, 0x3eafddd2, 0x3e2ac25c, 0x3e9922e0, 0x3ec26036, 0x3eebbab8,
0x3f495d11, 0x3d89a9d8, 0x3f19aff9, 0x3e85fce8, 0x3f5379d2, 0x3eece884,
0x3f53cf2e, 0x3f634ee4, 0x3e1ecc48, 0x3eef20a6, 0x3ec165ac, 0x3f3a4501,
0x3effdc42, 0x3e988ff0, 0x3f51f7da, 0x3f443ee8, 0x3e49edf8, 0x3f0ee752,
0x3f60e541, 0x3dfeb8b8, 0x3e9d672a, 0x3f71a387, 0x3f49cd1f, 0x3e4b5e48,
0x3f4407de, 0x3f1dcc19, 0x3f5db90d, 0x3df4e668, 0x3f2415ac, 0x3d2036c0,
0x3f1937dc, 0x3f2e41e7, 0x3df0ea08, 0x3f207d8d, 0x3f54de5c, 0x3ebd6b04,
0x3ccb1ce0, 0x3f3e12a0, 0x3f6466e2, 0x3d02c9a0, 0x3f049180, 0x3ef4e928,
0x3f3ce2f4, 0x3ec7cccc, 0x3e43afbc, 0x3f248e42, 0x3f43918a, 0x3e2cd7d0,
0x3e5667d8, 0x3f43b581, 0x3e14715c, 0x3f560b4c, 0x3e985292, 0x3f307ecf,
0x3f1e8f76, 0x3f127f33, 0x3f4ca87d, 0x3f10d78a, 0x3f78e196, 0x3eae9436,
0x3f091df0, 0x3f47b310, 0x3d3302c0, 0x3f4c97fb, 0x3f43dbe4, 0x3eafe3f0,
0x3dc86608, 0x3f79ec4e, 0x3f00ae56, 0x3db23db8, 0x3b78b900, 0x3e750ee8,
0x3f18c0bc, 0x3ee0f508, 0x3ed1941a, 0x3f252a81, 0x3e830b0c, 0x3e2a455c,
0x3e3457a4, 0x3ece5dc0, 0x3f474bae, 0x3e79391c, 0x3e3ca940, 0x3f52c8c9,
0x3e280e9c, 0x3f4bc808, 0x3eac37ea, 0x3f171c7f, 0x3e811d68, 0x3f628233,
0x3f081aeb, 0x3e668b58, 0x3e524fb8, 0x3f060431, 0x3efe42f4, 0x3ee88ba8,
0x3e904256, 0x3e97380a, 0x3f36e5fe, 0x3f52320d, 0x3f789972, 0x3e778920,
0x3ec925de, 0x3f6b3040, 0x3f00a8c0, 0x3f048bd4, 0x3ea06fd8, 0x3ef9e334,
0x3e7ef494, 0x3f5107ea, 0x3e78e178, 0x3f3a98cd, 0x3e966bf8, 0x3ecd02ee,
0x3f700d02, 0x3f789ad5, 0x3ecbde24, 0x3f4ebdab, 0x3f1720d2, 0x3f3a08c4,
0x3f3a7fc6, 0x3e2ffae4, 0x3e0faae8, 0x3ecc09e8, 0x3f1f00a4, 0x3ec6a7d6,
0x3f762028, 0x3f4d490d, 0x3eed2a50, 0x3e0d1d84, 0x3ed2208e, 0x3e6a8dfc,
0x3e506670, 0x3ebe807e, 0x3e226ae8, 0x3ef31cae, 0x3ea68cf8, 0x3f514123,
0x3e9c3530, 0x3f744663, 0x3e9f6e0e, 0x3f34916e, 0x3ed4a298, 0x3eb41312,
0x3ec7f226, 0x3f0fd8be, 0x3f680726, 0x3d34bb20, 0x3f109d2b, 0x3f5ee98a,
0x3e5208a0, 0x3ecff9f0, 0x3f00bdd9, 0x3e64ad70, 0x3e1f3b94, 0x3f06c818,
0x3d3448f0, 0x3f5c78e3, 0x3f01590a, 0x3f3f5d23, 0x3f76c802, 0x3f4a8229,
0x3e2f6268, 0x3e5f4e80, 0x3da63b10, 0x3e218fbc, 0x3ee79b0a, 0x3ef12da4,
0x3f0b1eb3, 0x3f5d3ef2, 0x3ece0f6e, 0x3f72eaea, 0x3f609c76, 0x3ec588a2,
0x3f26821b, 0x3f34b1fb, 0x3e9d18d2, 0x3e36f0dc, 0x3eb39350, 0x3f04961a,
0x3e89e9d4, 0x3cff9ae0, 0x3d65f5a0, 0x3f66ffb4, 0x3f06a561, 0x3edb3446,
0x3ec6fc78, 0x3f3c4eb4, 0x3f4c77b3, 0x3deb7f70, 0x3f12c831, 0x3f0bad94,
0x3e3476e4, 0x3ee84aac, 0x3e89db56, 0x3f0ae963, 0x3cff0160, 0x3f0f3758,
0x3f46ce71, 0x3ebaa738, 0x3f65f39a, 0x3ef8b3fc, 0x3f16d8bf, 0x3f3a1f29,
0x3f35b3c0, 0x3e8f1cfe, 0x3e51f31c, 0x3f31af9a, 0x3ed1fbfa, 0x3f027566,
0x3f22276a, 0x3c632300, 0x3f06a19e, 0x3f11828f, 0x3f15517c, 0x3eec5686,
0x3f37849b, 0x3ecff914, 0x3f288069, 0x3f6f67a9, 0x3f1c340a, 0x3f064ff6,
0x3f358da9, 0x3f0db027, 0x3f080aae, 0x3ec92462, 0x3f4fa70a, 0x3f10f69f,
0x3ee25440, 0x3eace126, 0x3f0fd537, 0x3f72877d, 0x3eb3cf50, 0x3f76d9f9,
0x3f732b7a, 0x3e920596, 0x3ec8e89e, 0x3ebdd1e4, 0x3ea91346, 0x3f22981e,
0x3d592dc0, 0x3ee045d6, 0x3f435a29, 0x3dc14358, 0x3f4e6f74, 0x3f70eeb9,
0x3e5a1320, 0x3f0fb2e1, 0x3ef85dc6, 0x3f5e4519, 0x3f319171, 0x3f34a145,
0x3f5e7569, 0x3eb26be2, 0x3f414f54, 0x3f1a466c, 0x3f21fc0c, 0x3f582f71,
0x3f7fd18b, 0x3e99f12a, 0x3eaa84c0, 0x3f20f760, 0x3e347ea4, 0x3e1edd98,
0x3dc48998, 0x3dd5ace0, 0x3e953030, 0x3f26d6dc, 0x3e7fd7f8, 0x3f5a4b96,
0x3f387a47, 0x3f577696, 0x3f1e696f, 0x3f4d9809, 0x3df64db8, 0x3e6a57ac,
0x3f504830, 0x3aa13e00, 0x3efcfd44, 0x3f407dab, 0x3f3849a3, 0x3f09fa5d,
0x3ec16df8, 0x3f00d206, 0x3d8b01d8, 0x3f2b3041, 0x3e8363ee, 0x3f705db6,
0x3e8febdc, 0x3f097505, 0x3f58532a, 0x3f12c068, 0x3f4edfcb, 0x3ef9bfd4,
0x3ed74622, 0x3d4071a0, 0x3d135070, 0x3f0972a9, 0x3df15ad8, 0x3f4417ef,
0x3e1c6988, 0x3ea42372, 0x3f645560, 0x3f45967c, 0x3d368900, 0x3ee80cfa,
0x3e67671c, 0x3f48c9dd, 0x3dc8a3d0, 0x3f74e491, 0x3e78e600, 0x3e1e1fbc,
0x3da61ec0, 0x3e6ab84c, 0x3d9620c0, 0x3eb9f948, 0x3f5c169c, 0x3f643138,
0x3e088630, 0x3f723855, 0x3f4eff0f, 0x3d5799f0, 0x3f40e570, 0x3e62c9e4,
0x3f5817d0, 0x3e9cb156, 0x3f3dca3d, 0x3f1e4567, 0x3e19d6c0, 0x3f7ceae9,
0x3f276397, 0x3eca861a, 0x3f4f2fec, 0x3f429788, 0x3f1c976f, 0x3ed929ba,
0x3f560095, 0x3ebc78e8, 0x3e8d7156, 0x3e19952c, 0x3db0afd0, 0x3e94a56e,
0x3f051ecb, 0x3f43665f, 0x3eda474e, 0x3e2f0b2c, 0x3e043530, 0x3dab6028,
0x3e9a58d4, 0x3d966070, 0x3f09c912, 0x3dd51d78, 0x3e8a96ea, 0x3f4bcc7c,
0x3d9942f8, 0x3ebe3416, 0x3f309cd0, 0x3ef365a2, 0x3f43957d, 0x3dc081b8,
0x3ecbb09a, 0x3f0535f7, 0x3ef6717e, 0x3e5845b0, 0x3f1072e5, 0x3edbc85a,
0x3f71f39b, 0x3f30676e, 0x3c126b40, 0x3f27b5f7, 0x3ec6872e, 0x3eedc058,
0x3f5469a4, 0x39b64000, 0x3ebce648, 0x3f3ea5e5, 0x3f283526, 0x3f32f4cc,
0x3d0e8b50, 0x3e43fe94, 0x3f579a71, 0x3e8984e6, 0x3f18a322, 0x3e940f60,
0x3d95f7e8, 0x3e7073c4, 0x3ef1eac6, 0x3e40a1d0, 0x3f47cb81, 0x3f1fa1ac,
0x3f1579ce, 0x3f741f69, 0x3f017077, 0x3d032bd0, 0x3f33b776, 0x3e0b9120,
};
// 4,6,6,7
uint32_t kernel_vals[] = {
0xbd4a03c3, 0xbc5236e0, 0xbcae018c, 0x3bbe1100, 0xbd5142e3, 0x3d87edd1,
0xbd47b618, 0x3b769320, 0x3d9ab045, 0xbd05cbfe, 0x3d61fe4a, 0x3c25a5c8,
0xbd900d38, 0x3d62a71e, 0x3d9c1deb, 0xbcdfe380, 0xbcfb6226, 0x3d733b32,
0xbccdcb56, 0xbd922153, 0x3d6b1ada, 0xbd6120bb, 0x3c675e68, 0x3d9ab0b5,
0xbd665cdb, 0xbd954bc3, 0xbd8b4e1e, 0xbc8efd34, 0x3d761bda, 0x3cc9bd6c,
0xbb01d160, 0xbd5238bb, 0xbd73ef38, 0xbb98c350, 0x3c010f40, 0x3b2d32a0,
0xbc32d1c8, 0xbc4765c8, 0xbc0e9040, 0x3da601a9, 0x3d9fec2b, 0x3da50ef9,
0x3cfd2260, 0x3d75a5aa, 0xbb2958e0, 0xbcf45a30, 0x3d5b947a, 0x3b4502a0,
0xbd87a9ac, 0xbd30ab90, 0xbd7e999b, 0xbcdc1fe0, 0x3cf19a1c, 0x3cf83e74,
0xbd6ce22e, 0x3c9412e0, 0xbd6314f8, 0xbd8989b6, 0xbb206ee0, 0xb94f9800,
0xbb4cfd20, 0x3da94535, 0xbd91357b, 0xbd0a68a0, 0xbd84500b, 0xbda98373,
0xbd90c926, 0x3cce5310, 0xbd666610, 0x3cf01390, 0xbce33ad6, 0xbaae3e40,
0xbc9744a0, 0x3c36b518, 0xbc9bbcc4, 0xbd87c6ca, 0xbd829a38, 0xbd1270c6,
0xbd2f3168, 0xbdaa5313, 0x3da73187, 0x3bd83540, 0xbd7bb888, 0xbd9c2266,
0x3d385dba, 0xbd7461cb, 0xbd7f31ab, 0x3d800dc7, 0xbcbc873c, 0xbd667fb3,
0xbd9abfd4, 0x3d22eaa6, 0x3c806804, 0xbcb97cc0, 0xbc88b354, 0xbda676d8,
0xbd4ed83b, 0xbd1a0aae, 0xbd7773c8, 0x3bf2a210, 0x3ce93214, 0x3d9452fb,
0x3c9b2c44, 0x3c990d8c, 0xbd964a23, 0x3d79671a, 0xbc6d6958, 0xbd69e633,
0xbd138708, 0xbbc895d0, 0x3d2790f6, 0x3c61ca38, 0xbb4f0e20, 0xbd940883,
0x3d7bfc2a, 0x3d6fdb12, 0x3d315be2, 0xbd2d4c3b, 0xbd0c25c0, 0x3d72df32,
0x3c917af4, 0xbd8c49b7, 0xbd0b3336, 0xbd59673e, 0x3d84b6f9, 0x3d6b8ebe,
0xbc50ffa0, 0x3da06631, 0x3c677ab8, 0x3da59d41, 0xbda8ae8a, 0x3daa0c4b,
0x3c5d9af8, 0xbd87d57a, 0xbd2fb7e3, 0xbd5eb736, 0x3cc0fb3c, 0x3bec2f80,
0x3d62a892, 0x3d01ef10, 0x3d5d3c0a, 0x3cacdaa0, 0xbcb23880, 0x3d6bfc32,
0xbd8c4892, 0xbcc77b7c, 0xbd366458, 0xbc0f6448, 0x3b6d7660, 0x3d259518,
0x3d917fed, 0x3c778448, 0xbd9d1dc6, 0x3daa79e5, 0x3c574c60, 0x3d8fcc0b,
0xbd8a1148, 0x3ae3dc00, 0x3b02cd80, 0x3ca7b88c, 0xbce928d6, 0x3d6000fe,
0x3d7cd93a, 0xbd0491f3, 0x3d939069, 0x3da781f5, 0x3cab431c, 0x3d707002,
0xbd09fcfe, 0x3cf1dfc4, 0xbcc6e010, 0x3d9b0a09, 0xbc938a64, 0x3d934c85,
0x3d044c48, 0x3d8bccb1, 0xbd7820d0, 0xbcd881ac, 0xbd0c0308, 0xbda88d38,
0xbd1c52db, 0xbd090ae8, 0x3d478712, 0xbd3add30, 0x3d6eecaa, 0x3d436ff2,
0x3c0c4e38, 0x3d57f8ba, 0x3d941841, 0x3d893f19, 0xbcbbe2bc, 0x3d5f09d6,
0xbd7a70a8, 0xbc3c97c8, 0x3c817dc4, 0x3d4588a2, 0x3d522efa, 0xbd9de178,
0xbc1b6868, 0x3ca2b0ac, 0x3da67919, 0xbd61e47b, 0x3d30937a, 0x3d20a766,
0x3d83eabd, 0xbda09743, 0x3d1bcf6e, 0xbd0e940e, 0x3c1630e8, 0xbd95ac72,
0xbcbe5580, 0xbd850cff, 0xbd0121cb, 0xbda5dcd3, 0x3c597e38, 0xbd048f08,
0x3d7364e6, 0xbc5b3fe0, 0xbb9ecb50, 0xbcc07970, 0x3d495e42, 0xbd7244a8,
0xbba458c0, 0xbd2938bb, 0xbc857b8c, 0x3d7c964a, 0xbc4a2920, 0x3c4ee508,
0xbd8f9162, 0x3d6936f6, 0xbcf24430, 0x3c83eb40, 0x3c69dee0, 0xba93cac0,
0xbd195598, 0x3d18eb9a, 0xbd646000, 0xbc436438, 0xbd198366, 0xbca107f4,
0x3d87a943, 0xbd06fdc0, 0x3cb5c450, 0x3d386062, 0xbd2c5af0, 0xbbcde190,
0x3da093c1, 0x3d3340ca, 0x3d7b3e6a, 0xbd884b9b, 0x3cc6e034, 0xbc27b048,
0x3d685ba2, 0x3d47e7fa, 0xbd7c1920, 0x3d020fa8, 0x3cfede94, 0x3b942550,
0x3d9e15cd, 0xbd87bafb, 0x3d1b78de, 0xbcc421a0, 0x3d2fbc62, 0xbc84d870,
0x3d9fba41, 0x3d9d1cc9, 0x3d57c20e, 0xbc382738, 0x3d9a5495, 0xbcce81d6,
0x3c6d1f18, 0x3cdc9670, 0x3da20251, 0xbda7c0eb, 0xbda8c18b, 0x3d23aef0,
0xbd44bb1e, 0xbc7d4a58, 0xbc1d0c88, 0x3c8b2db4, 0xbc613720, 0x3d988a2d,
0x3d9f800d, 0xbda3b07e, 0xbd560d4b, 0x3d84a039, 0xbd485743, 0x3c8ffad4,
0x3ca4b1f4, 0xbd076a66, 0x3d9fe6cf, 0x3d1e1f82, 0xbd73ba73, 0xbd4d1ea3,
0xbd1b65a8, 0x3cce6e20, 0x3d5626ce, 0x3d97d1af, 0xbd827e7a, 0xbd3a5a08,
0x3ccbcf1c, 0x3cb14adc, 0x3d00c4e0, 0xbd824f02, 0xbcab27c6, 0x3d8a989b,
0xbd982f57, 0xbd6df280, 0xbc9ad2bc, 0x3d8adfa9, 0x3d1af2f8, 0xbcb937e6,
0xbd849250, 0xbd869830, 0xbb81f4b0, 0x3d80eaed, 0x3d659d6e, 0xbd6d9283,
0xbd911c08, 0xb9dd0900, 0xbcddf680, 0x3d875dd1, 0xbce7e3e6, 0xbd261250,
0x3d638eca, 0x3ce98ecc, 0xbd7a0fd6, 0x3d03a7fa, 0x3cdbc19c, 0xbd83005c,
0x3ccd6be4, 0x3d743cca, 0xbcb62090, 0x3d8e6675, 0xbd069c18, 0xbd9910cc,
0xbd40193b, 0xbc57c220, 0x3da191a9, 0xbd44f610, 0xbd30923b, 0x3ccfcfd0,
0x3d99dd31, 0xbda5ff74, 0xbd6cf72e, 0xbc9be3c4, 0xbc9468f4, 0x3d8ea223,
0x3d2b00c2, 0x3da1cfc5, 0x3d769002, 0xbc0ba500, 0x3ca23384, 0xbd455806,
0xbda0bce0, 0x3c75e600, 0xbb8ed8d0, 0xbd3820d3, 0xbd59c020, 0x3d3931da,
0xbd6e0228, 0xbcfc3f16, 0x3d424db2, 0x3ce49904, 0x3cb2f314, 0xbd09cc83,
0x3da8afe5, 0x3c0677c8, 0xbd24a163, 0x3d370cd2, 0xbd925bee, 0x3cb6c29c,
0x3c2a68c8, 0xbd340246, 0xbc050168, 0xbd216376, 0x3d340a32, 0xbd2faa58,
0xbd83fb53, 0xbd78bfde, 0x3da08a5d, 0xbd36e5f0, 0xbb33c3e0, 0x3d3c6dbe,
0xbc6b0880, 0x3d946a69, 0xbd733f56, 0x3d8743e3, 0xbda8f65f, 0x3d66b4d6,
0xbc0493f8, 0xbcb2bde6, 0xbd5d4a2b, 0xbd09095b, 0xbb6b1e20, 0x3d8f511d,
0x3d08ee3e, 0xbd1fc2ee, 0x3b389ba0, 0x3d5b997a, 0xbd4845c8, 0x3cc9af00,
0x3c9d8610, 0x3d004b8e, 0x3c2ed268, 0x3da3da01, 0x3b3a9a20, 0xbcfd49f6,
0xbd56ae76, 0xbd03c61b, 0xbd5726ee, 0xbd5a2928, 0x3d55484a, 0x3c9a634c,
0xbd8770c6, 0x3d6d1f0a, 0x3d33e832, 0x3d694b3e, 0xbcc187fc, 0xbd242a76,
0xbd13130e, 0x3da72273, 0x3d92b731, 0xbd4aebb6, 0xba1b6580, 0x3d72c8e6,
0x3d4925e2, 0x3d236c5e, 0x3da15fff, 0xbaedc400, 0x3d4c3222, 0x3cbe0dac,
0x3b0f3d80, 0xbcdc1c6c, 0xbc492478, 0xbb4e9ce0, 0xbc4403a0, 0xbca3c834,
0xbd9505ff, 0xbd40e9c8, 0xbcafff9c, 0xbc7b90e0, 0x3cc9f06c, 0xbd4b6eab,
0x3d7358ea, 0x3cbcc304, 0xbd7b946b, 0x3da83db9, 0x3da7ba81, 0x3d7273aa,
0x3c9fb3f4, 0x3d35f28a, 0x3cb64c2c, 0x3d13ae6a, 0xbb8e24c0, 0xbd0a239b,
0xbcac75bc, 0x3d37fdce, 0x3c8e076c, 0xbd19c92e, 0xbd8af5ca, 0xbce7a0e6,
0xbd9bef43, 0xbd71ac18, 0xbc20bd68, 0xbd8713cb, 0xbd0ba7f6, 0x3d9bdc03,
0xbd748a23, 0x3cf9f78c, 0x3d1957ce, 0xbcb48d90, 0x3d87a11d, 0xbd368300,
0x3d71779a, 0x3d9f7779, 0x3d13d07e, 0x3d78d2a2, 0xbb27f100, 0xbc85c52c,
0x3d71496a, 0x3d9dd073, 0xbd64cd66, 0xbd0dec4b, 0x3d5366f6, 0xbd140f3e,
0xbccf4696, 0x3d890e07, 0xbd5fc223, 0xbd48bb08, 0xbb1e2120, 0xbcf35ff0,
0xbb3fa780, 0x3d2367c0, 0xbcb8fd1c, 0x3c4230a0, 0x3c28b8e0, 0xbd843ad8,
0x3d667bb2, 0x3d6ba9ba, 0xbd31a7c8, 0xbc91a29c, 0x3d1f525e, 0x3d56d02a,
0x3c58f918, 0xbd4e7748, 0xbd069380, 0xbd868d93, 0x3c189918, 0x3c4db988,
0xbd597f13, 0xbd66b160, 0xbc9c6190, 0x3c6cd880, 0x3beff080, 0xbd4fd32b,
0xbda0b9f6, 0x3c616068, 0xbca808ac, 0x3c9ac354, 0xbd344e9b, 0xbb158160,
0x3b365c60, 0xbd3e3f5e, 0xbc0d2708, 0x3d62fcc6, 0x3cda7c50, 0x3d153bc0,
0x3d158ff8, 0x3d4ebf7a, 0xbbdae480, 0xbd6a50c6, 0xbd7e4803, 0xbc7792e8,
0xbd12d0de, 0x3c343060, 0xbd7420e8, 0xbc5a75f8, 0x3ce2c7c0, 0xba3b0480,
0x3ccd7200, 0x3c3f2c08, 0xbd46fff0, 0x3d4a83aa, 0xbd11f5d6, 0x3d3bee22,
0x3d36d2b2, 0x3a294080, 0xbbed67d0, 0xbd8575c0, 0x3ccbd3dc, 0xbd7b58d6,
0x3cfc2f3c, 0x3d10d15e, 0x3d9eac33, 0x3d63527e, 0xbd95759f, 0xbd817ba4,
0x3d050c4e, 0x3d93373d, 0x3da49995, 0x3d90861d, 0x3d55d23a, 0x3d3b2d0a,
0x3d9d3063, 0xbd723016, 0x3cc20d70, 0xb9c62100, 0xbd859a97, 0xbd0d1698,
0xbda20baa, 0x3c74b958, 0x3d11e536, 0x3d9a2b91, 0x3d35b086, 0x3d7029e2,
0xbd8368e6, 0x3d2295c8, 0x3d5e2072, 0xbc1980a8, 0xbd01fb43, 0x3d507efa,
0x3d84132b, 0xbd69cbb8, 0xbd124ef8, 0x3d05e09a, 0xbd583096, 0xbd25dcb8,
0xbc9b34f0, 0xbc012fe0, 0x3cf104c4, 0xbc939eec, 0xbd92f58e, 0xbda36403,
0x3d4ac81a, 0xbd9225a3, 0xbd3a51c0, 0x3d98faeb, 0x3d6f850e, 0x3d4396c6,
0x3d2b0e4a, 0x3da2cb89, 0x3d77ff42, 0x3d680d82, 0xb7820000, 0x3d04fd7a,
0xbc040aa0, 0xbcd8f0f6, 0x3cf44ea0, 0xbd972e2a, 0x3da9c73b, 0xbd958eda,
0xbc3de068, 0xbbf73cf0, 0xbcaed310, 0xbd9136ac, 0x3d3f64ca, 0xbd939ec8,
0xbd5295ab, 0xbcf5ca66, 0xbd974058, 0xbd3b6956, 0x3c663100, 0xbd4bc706,
0x3cd65f44, 0xbc8aa470, 0xbd77e1fb, 0x3d827151, 0x3d12755e, 0xbd62511e,
0x3cc7c950, 0xbd526e3e, 0xbd26b046, 0xbd03d88b, 0xbdaaa8ee, 0x3c87e5bc,
0x3c4d17b8, 0x3ab2c4c0, 0xbb761120, 0x3d96b75d, 0xbd8fadcf, 0xbc7c0238,
0xbc6dd6a0, 0x3c67ac88, 0xbd686143, 0xbd8ab51f, 0xbd2b4853, 0x3d4d83ca,
0x3d0652e6, 0x3da86b55, 0xba274280, 0x3d9f8853, 0x3d1f1aca, 0xbd1ff04e,
0xbd87a7e4, 0xbbeb7310, 0xbd0e4be0, 0x3d82c1f5, 0x3b93f100, 0xbc48fda0,
0xbca725a0, 0x3cc52edc, 0xbd0d028b, 0x3c064460, 0x3d108328, 0x3d445006,
0x3ca2ebcc, 0x3cb4c6a0, 0xbd918164, 0xbce079b0, 0x3d468be6, 0x3bcf5b30,
0x3c94a440, 0x3d57dd62, 0xbda84590, 0x3d4c6202, 0x3c5a8a08, 0x3d1c1212,
0x3cb9ae74, 0x3d50fd96, 0x3d0b3670, 0xbd3f0786, 0x3d88c839, 0xbceb9f90,
0x3c353208, 0xbd91131b, 0x3d61f992, 0x3d596c52, 0xbcb54dc0, 0xbd5eb8ce,
0x3bf4c430, 0xbce49680, 0x3c96c7d0, 0x3da53077, 0x3d8b11d5, 0x3d007956,
0xbd4240ce, 0xbd2dd5b0, 0xbd91414f, 0xbd9d1547, 0xba83de00, 0x3d2867f2,
0x3d893173, 0x3d8460a1, 0x3d2e6bfa, 0xbd9f0580, 0x3d808d1d, 0xbce392b6,
0xbbcea8c0, 0xbd4bbc6b, 0xbc0c4918, 0x3d4201fa, 0x3d42c78e, 0xbd1ed16e,
0x3d447092, 0xbcb6e20c, 0xbca1eb10, 0x3c9982f0, 0x3c174de8, 0xbd4b2130,
0x3d64072a, 0x3c003838, 0xbd98af67, 0x3cde6524, 0x3d9eac01, 0x3d3c4262,
0x3d708e6a, 0xbd82ab1f, 0xbd6b1bf6, 0xbd1b98a0, 0xbcfccf66, 0xbd99004a,
0x3d9f1509, 0xbd851e14, 0x3d8a5981, 0xbda1edb8, 0xbd828fc6, 0x3d17dfd6,
0x3d3fb852, 0x3d85bafd, 0x3cbb2a14, 0xbc00aed8, 0x3d563aee, 0xbccda146,
0x3daa4fc9, 0x3d009e36, 0x3d043db6, 0xbd59b186, 0xba87d740, 0xbcf34820,
0xbd601c9b, 0x3d879839, 0xbc78e9e0, 0x3d522172, 0x3d843c15, 0xbcf89fd0,
0x3d9de679, 0x3c76e580, 0x3da022d9, 0xbb2b0960, 0x3d8d958d, 0x3bb72830,
0x3d47e44e, 0xbd861bcb, 0x3d841fbd, 0xbcb14396, 0xbd9e5a1f, 0xbd50064e,
0xbd65881e, 0xbc2c1178, 0x3c9935f4, 0x3cd26b14, 0xbd964b93, 0x3d0ece78,
0xbd3823e8, 0xbd390750, 0x3cc02404, 0x3ce14fc4, 0xbd468230, 0x3c71fc00,
0xbbcb8d10, 0xbd65f760, 0x3d6b7c6e, 0x3cf6fde4, 0x3b848080, 0x3d8c8471,
0x3d79d7da, 0x3bc5f110, 0x3d475ae6, 0x3cc6fd24, 0x3d216f98, 0x3d97f671,
0x3cc07234, 0xbca16f40, 0x3d30e80a, 0xbd0c4656, 0x3d071596, 0x3c7ee198,
0x3bd44630, 0xbc975adc, 0x3cf64664, 0xbd98952e, 0xbd658dce, 0xbd94b083,
0x3d29ce46, 0xbc531258, 0x3d8a636b, 0xbc40ea08, 0x3da40a5d, 0xbd13582b,
0x3d8f2b9d, 0xbd16a40e, 0xbda1e36a, 0xbd6b0edb, 0xbd3335bb, 0x3adacdc0,
0x3bdebd00, 0xbd45d8b8, 0xbd6f4a10, 0xbc522c38, 0xbd65d418, 0xbda75ad6,
0xbc027a68, 0x3da13ba5, 0x398f4500, 0xbd855b0c, 0xbaae74c0, 0x3c77ccf8,
0x3cd991f0, 0x3c98cfcc, 0x3c67cfc0, 0x3c92a474, 0xbda73f5a, 0xbd47c613,
0xbc87a984, 0xbd98ea20, 0xbd688dd6, 0xbd9e1cc8, 0xbd63b108, 0xbd42dc90,
0xbd31977b, 0xbcc06b16, 0x3d748c1a, 0x3d081cd2, 0x3b634420, 0x3ce49d64,
0x3cc3b2dc, 0xbd987c76, 0xbd08e1a6, 0xbc03aa00, 0xbda3ae33, 0x3c9f9694,
0xbd6b8b8b, 0x3c19d538, 0x3c7765e0, 0x3d0880ce, 0xbd1a8f38, 0x3d3f94a2,
0xbd72580e, 0x3d047fe6, 0xbb229f80, 0xbd289976, 0x3d882ef5, 0x3d83c213,
0xbbffd530, 0xbd12bb66, 0x3b79b6a0, 0x3d320592, 0x3d41b30e, 0x3d0dfdb6,
0xbb816370, 0xbc16cf78, 0xbd96a58f, 0xbbcbc7c0, 0x3cd957bc, 0x3d8f1b19,
0xbd3e4b03, 0x3da8f2e7, 0xbd2e5dd3, 0x3be3d140, 0x3d1a5f92, 0x3cc65100,
0xbcb5ecd6, 0xbd98aac0, 0x3d8f6871, 0x3d17c096, 0xbb20c5a0, 0x3ce1e18c,
0xbd8fc1c0, 0xbbfcf430, 0x3ae3df00, 0xbd092c23, 0xbcece9ec, 0x3d30a792,
0x3d31f3f6, 0x3d9680b1, 0x3d06d1da, 0xbc4dfd68, 0x3d9dd547, 0x3d85ae15,
0x3d205320, 0x3d14a018, 0xbc8ad6fc, 0xbd42672e, 0xbd7779de, 0x3da53843,
0xbd69a3d8, 0xbd3bbe16, 0xbd7468a6, 0x3cf76de0, 0x3d3af472, 0xbbc2b040,
0xbd471f58, 0xbd9682bb, 0x3cb26d40, 0xbda47dee, 0x3d47fdba, 0x3d66c7e2,
0xbcfb69ac, 0xbd9d1c80, 0xbc4a0e68, 0x3cf65330, 0xbd1b30ab, 0x3cd110a0,
0xbc572bd8, 0x3d8fe485, 0x3abb1f40, 0x3d8baaed, 0x3c785ca8, 0xbda6b760,
0x3c642688, 0xbd9156a0, 0xbb8e1b90, 0xbd393ae0, 0xbcba77bc, 0xbda80d80,
0x3d89eb6d, 0xbcd71d4c, 0x3d5b3d2e, 0x3da096c1, 0x3d977241, 0xbc756640,
0x3da514a5, 0x3d3e230a, 0x3ba671b0, 0xbda375c7, 0x3d8f01c3, 0xbd5e89ee,
0x3d9913a5, 0x3ac94200, 0xbd46aec3, 0xbd85060f, 0x3a4ca080, 0x3d3901f2,
0xbd2cc0d6, 0xbd38388b, 0xbd977c72, 0xbd68695e, 0xbd356b80, 0x3c396248,
0x3d7d26e6, 0x3d9f7b59, 0x3d5170ce, 0xbd8fc497, 0x3b8a3e50, 0xbd60a916,
0xbcf97180, 0x3ceb3d34, 0x3c90038c, 0xbd905acf, 0xbd447fa0, 0xbcb5506c,
0x3ca3926c, 0x3ce23a2c, 0x3cc87fb0, 0xbc6f4800, 0x3cf6c04c, 0x3ceff3dc,
0x3cdb344c, 0xbd747cd6, 0xbca6ca9c, 0x3da86511, 0x3d35aa3a, 0xbda53be8,
0xbd4e883e, 0x3b353780, 0xbd67b886, 0xbd8006bf, 0x3a1be600, 0xbd16e450,
0xbda47564, 0xbcaebbc0, 0xbd900413, 0xbd9261d2, 0x3d4f1bba, 0xbc26e7c0,
0xbd4c509b, 0x3d925d1d, 0xbda9f75b, 0xbd958d2b, 0x3d8b3fb7, 0xbd0bd06b,
0xbd08bf1b, 0x3d904c03, 0xbd321c06, 0xbd90b0ff, 0x3d5b7eaa, 0xbc8051d0,
};
// 7
uint32_t bias_vals[] = {
0x3d7895da, 0x3d66c0c2, 0x3da8f295, 0x3c8090d0,
0x3b0969a0, 0x3da4fbad, 0xbd55808e,
};
// 4,5,3,7
uint32_t output_exp_vals[] = {
0x3e800009, 0x3e5cc2bb, 0x3dd8b7fc, 0xbd6f5e64, 0xbf0049b7, 0x3db2113b,
0x3c2e3283, 0xbea7f82c, 0x3e190482, 0x3e964a40, 0xbdeea57b, 0xbe488eb1,
0x3dbd218d, 0xbd843b27, 0x3c353168, 0x3dadf9bd, 0xbe95f953, 0xbe2ab356,
0xbe95d365, 0xbe939e65, 0x3e89892d, 0x3e25c7d0, 0x3e9b0553, 0x3e39f0f1,
0x3eca599b, 0xbe152c42, 0x3ea822df, 0xbe339202, 0x3e6d580b, 0x3e9794f4,
0x3dde4c3a, 0x3d8b0621, 0xbf1a21a5, 0x3dfc1fe5, 0x3e49eed5, 0x3e5ebc15,
0x3d19c5a0, 0x3d8d4f3d, 0xbea808ac, 0xbecf76da, 0xbee54599, 0x3ea8e7bd,
0x3d4c47a1, 0xbcd8a9d4, 0x3e1d24d0, 0xbcd58c40, 0xbee6366e, 0x3e88acad,
0xbbb28961, 0xbea28a89, 0x3ea61d80, 0x3ed57081, 0xbdf12a11, 0xbf1070d0,
0xbeb9ecba, 0x3d948c77, 0xbe1630c7, 0xbd865d9a, 0x3d694f50, 0x3d618060,
0xbe90c00a, 0xbc7090db, 0x3ec9335e, 0x3d69cdbb, 0x3d960288, 0x3edc27a8,
0x3e078210, 0xbebb55cf, 0x3ddc602d, 0xbde874ef, 0x3db7ea35, 0x3e592130,
0x3e2a8154, 0xbe215282, 0xbf16bf05, 0xbdadf37e, 0x3daf312c, 0xbe3b1d5e,
0xbdf7a1ce, 0xbdbffeb8, 0xbe1678e4, 0xbe663303, 0xbe360a72, 0xbe2acad4,
0x3e436c58, 0x3da03a17, 0xbcd28ce2, 0x3e86bb89, 0xbd8a93ca, 0x3e1ccc14,
0xbcc44d16, 0x3e75ece0, 0x3e21a281, 0x3e9e283c, 0xbdc6f8fb, 0xbdd9e63e,
0xbe41d862, 0x3e958b70, 0x3dd38536, 0x3d36528a, 0xbdae929f, 0xbce4e6a2,
0xbd965d20, 0xbe1e6fc7, 0x3e596745, 0x3e6acaab, 0x3e0c2af5, 0x3ec92511,
0x3d9b3198, 0xbe7669a4, 0x3e6d9559, 0xbe238994, 0xbd6f15a0, 0x3ecbbbb7,
0x3dea562a, 0xbdcd3380, 0xbef6648c, 0xbe05855d, 0xbd10f5eb, 0x3ee80009,
0x3db3b7f8, 0x3de7f23f, 0xbe15d9b3, 0xbe03a005, 0xbdbd7cce, 0x3e17e7f6,
0x3e050054, 0x3dfdf63a, 0x3f0b55cd, 0x3dde99dd, 0xbe93ac48, 0x3d927932,
0xbc006933, 0x3e995281, 0x3e9fa656, 0x3e0838d8, 0xbdb96f3f, 0xbe628743,
0xbe9d09ac, 0xbcc5c04d, 0x3e2b9311, 0x3e544b71, 0xbe0b460d, 0xbeba0990,
0xbe86fe5a, 0x3d86e498, 0x3e4f89b7, 0x3e85155f, 0x3e70834f, 0x3edb8026,
0x3e106b80, 0xbeb7ecce, 0x3e6d4fd1, 0x3dcee4a3, 0x3e2cec70, 0x3e652ac0,
0x3e3d0220, 0x3e802fb4, 0xbe94e2d7, 0xbe1a902e, 0x3dcf41ab, 0xbda36d77,
0xbdd779ec, 0x3d22e8e3, 0xbe17c8d2, 0xbed6b5ab, 0xbd1cefdb, 0x3e7ebc8d,
0x3cb6a703, 0x3dc6f159, 0x3e939f4c, 0x3cc6d6ae, 0xbea4d8c8, 0x3e5d0622,
0xbd23eacd, 0xbda939c9, 0x3e1b1995, 0x3e2f815b, 0xbeac3cd3, 0xbf15d0da,
0xbf03af7c, 0x3bc4a6d9, 0x3e8cf965, 0x3d1a27a2, 0x3eeaaa98, 0xbd7bdc52,
0xbd4fb68e, 0xbe4063ea, 0x3d8d0cc5, 0x3e916e54, 0xbac6376b, 0x3dd4732e,
0x3e7bfcc1, 0xbe81430c, 0x3e77d1b0, 0x3d80743a, 0x3c75e0f7, 0x3dc65b26,
0x3cb872dc, 0x3d429fa2, 0xbe005912, 0xbe103baf, 0x3de03a8c, 0x3e851e20,
0xbe285826, 0x3dbf8d53, 0xbe559c2a, 0xbe5b362e, 0x3d2ee6a4, 0x3e3f6f18,
0x3cc667e5, 0x3be227ae, 0x3d016ffc, 0x3e20fbce, 0xbe9fe558, 0x3e10e7f8,
0x3e4824d5, 0xbda27412, 0x3eb27828, 0x3e755759, 0xbe7c3e18, 0xbf2b8387,
0xbe8a3ca3, 0xbde6d540, 0x3d44254c, 0x3c9c5216, 0xbdf7ec4c, 0xbe3e7179,
0xbe8b1c89, 0xbe2ae027, 0xb94c3ecd, 0x3dee7547, 0xbd6cafec, 0x3e909c65,
0xbe25799b, 0xbe957a87, 0x3dd1f2cf, 0x3d4fffba, 0x3e9ad1f3, 0x3ecf9869,
0x3e851b77, 0xbef21b3c, 0xbf309a66, 0xbe9694e1, 0xbdd16fef, 0x3db6f5d4,
0x3cd36c12, 0x3d88ba04, 0xbe8e4ee1, 0xbeac0437, 0xbde01afc, 0x3ed26389,
0x3dfbaafb, 0x3b38fc61, 0x3e97a1f5, 0x3d6752c6, 0xbf07fb08, 0x3dcd47db,
0xbe56aea5, 0x3e02e71c, 0x3f02f762, 0x3e4dc44e, 0xbd755279, 0xbedfb32f,
0xbe522fa0, 0xbc4d388e, 0x3cecdfd6, 0x3da0a80e, 0x3bbac653, 0xbd82ef3d,
0xbe60044e, 0xbe9ca11d, 0x3e631998, 0x3d3bb59f, 0x3e71dfde, 0x3d2286e5,
0x3e98798d, 0xbf09b153, 0x3d6a9f74, 0x3e11df9c, 0x3e5de0f9, 0x3ea99bb4,
0x3e43c72e, 0xbddb4395, 0xbe69c536, 0xbc0e0570, 0x3ea5b6ca, 0xbcf35b4f,
0xbb791864, 0xbe413177, 0xbea6b9f5, 0xbddddcef, 0xbd76b1a3, 0xbc547e22,
0x3e4ee61d, 0xbe2bf763, 0x3de683ed, 0x3dbff523, 0xbe3847ed, 0x3e775bb5,
0xbd0b4a08, 0x3e59e88a, 0x3e26091d, 0xbdebc80d, 0x3dae7906, 0xbe19f53b,
0xbd8380c1, 0xbc766243, 0x3d2245bc, 0x3c994d8d, 0x3e4c9d2a, 0x3e0e9a3a,
0xbe13408d, 0x3da080bc, 0x3b439034, 0x3d52f123, 0x3dbe7f95, 0x3eeebcbb,
0x3ea507c6, 0xbed81e08, 0x3e9538e6, 0x3d3f8f59, 0x3e91f5d3, 0x3e9249cf,
0x3e83c73c, 0x3de597cb, 0xbee560c4, 0xbe1e8918, 0x3ca144b6, 0x3d726a89,
0x3d846794, 0x3dd78c67, 0xbea7732f, 0xbe7bde9e, 0xbe6706c7, 0xbdf711c2,
0x3e4bd329, 0x3d2304df, 0x3eabc3f0, 0x3d45e53a, 0xbedcc3e9, 0x3e8446f9,
0xbe3fd995, 0x3e547af2, 0x3f0aa842, 0x3ec9a325, 0xbed2a254, 0xbf0f6b26,
0xbde25b3f, 0x3dc8c105, 0x3d8ec9d0, 0x3d7fd7a1, 0x3e610f6a, 0xbdea6665,
0xbead5c1f, 0xbe80e77d, 0x3e489cb7, 0x3ec65a7c, 0x3e4ffac6, 0x3d55f9a6,
0x3e9ea98b, 0xbf0e625e, 0xbc8bde61, 0x3dd68ecb, 0x3bc64cbd, 0x3e86a09b,
0x3d554cb4, 0xbeccd628, 0xbf035dfc, 0xbe6c27ea, 0x3d4483cf, 0x3e223af9,
0xbe20cbee, 0x3dd718e7, 0xbe8fad2f, 0xbe938918, 0xbeb29804, 0x3e9142f0,
0x3de3e136, 0x3e9a39ce, 0x3efa0610, 0x3d9406ea, 0xbee24e0f, 0x3e38404b,
0xbe3ef7ee, 0xbc59ad65, 0x3e5f9e81, 0x3e23b360, 0xbea0121f, 0xbf0bfbbc,
0xbec79dfc, 0xbd7102a6, 0x3eb5585c, 0x3dca0b72, 0x3cb1a8b7, 0xbe1a22e0,
0xbe477474, 0xbe2e323b, 0x3e06e740, 0x3e6ec883, 0xbd822e0f, 0x3e84a3f8,
0x3dbc54c1, 0xbe2a66fe, 0x3e5fe31b, 0x3d6cdf21, 0x3eed45e5, 0x3e49ef86,
0x3e042067, 0xbd2b9ef1, 0x3d8982d5, 0x3cd070fb, 0x3ededc87, 0x3e0a8bfa,
0xbb858c8b, 0x3d04acd8, 0xbe08ce60, 0xbe30cc81, 0x3d582e3d, 0x3db17c5b,
};
// 4,5,3,7
uint32_t output_relu_exp_vals[] = {
0x3e800009, 0x3e5cc2bb, 0x3dd8b7fc, 0x0, 0x0, 0x3db2113b,
0x3c2e3283, 0x0, 0x3e190482, 0x3e964a40, 0x0, 0x0,
0x3dbd218d, 0x0, 0x3c353168, 0x3dadf9bd, 0x0, 0x0,
0x0, 0x0, 0x3e89892d, 0x3e25c7d0, 0x3e9b0553, 0x3e39f0f1,
0x3eca599b, 0x0, 0x3ea822df, 0x0, 0x3e6d580b, 0x3e9794f4,
0x3dde4c3a, 0x3d8b0621, 0x0, 0x3dfc1fe5, 0x3e49eed5, 0x3e5ebc15,
0x3d19c5a0, 0x3d8d4f3d, 0x0, 0x0, 0x0, 0x3ea8e7bd,
0x3d4c47a1, 0x0, 0x3e1d24d0, 0x0, 0x0, 0x3e88acad,
0x0, 0x0, 0x3ea61d80, 0x3ed57081, 0x0, 0x0,
0x0, 0x3d948c77, 0x0, 0x0, 0x3d694f50, 0x3d618060,
0x0, 0x0, 0x3ec9335e, 0x3d69cdbb, 0x3d960288, 0x3edc27a8,
0x3e078210, 0x0, 0x3ddc602d, 0x0, 0x3db7ea35, 0x3e592130,
0x3e2a8154, 0x0, 0x0, 0x0, 0x3daf312c, 0x0,
0x0, 0x0, 0x0, 0x0, 0x0, 0x0,
0x3e436c58, 0x3da03a17, 0x0, 0x3e86bb89, 0x0, 0x3e1ccc14,
0x0, 0x3e75ece0, 0x3e21a281, 0x3e9e283c, 0x0, 0x0,
0x0, 0x3e958b70, 0x3dd38536, 0x3d36528a, 0x0, 0x0,
0x0, 0x0, 0x3e596745, 0x3e6acaab, 0x3e0c2af5, 0x3ec92511,
0x3d9b3198, 0x0, 0x3e6d9559, 0x0, 0x0, 0x3ecbbbb7,
0x3dea562a, 0x0, 0x0, 0x0, 0x0, 0x3ee80009,
0x3db3b7f8, 0x3de7f23f, 0x0, 0x0, 0x0, 0x3e17e7f6,
0x3e050054, 0x3dfdf63a, 0x3f0b55cd, 0x3dde99dd, 0x0, 0x3d927932,
0x0, 0x3e995281, 0x3e9fa656, 0x3e0838d8, 0x0, 0x0,
0x0, 0x0, 0x3e2b9311, 0x3e544b71, 0x0, 0x0,
0x0, 0x3d86e498, 0x3e4f89b7, 0x3e85155f, 0x3e70834f, 0x3edb8026,
0x3e106b80, 0x0, 0x3e6d4fd1, 0x3dcee4a3, 0x3e2cec70, 0x3e652ac0,
0x3e3d0220, 0x3e802fb4, 0x0, 0x0, 0x3dcf41ab, 0x0,
0x0, 0x3d22e8e3, 0x0, 0x0, 0x0, 0x3e7ebc8d,
0x3cb6a703, 0x3dc6f159, 0x3e939f4c, 0x3cc6d6ae, 0x0, 0x3e5d0622,
0x0, 0x0, 0x3e1b1995, 0x3e2f815b, 0x0, 0x0,
0x0, 0x3bc4a6d9, 0x3e8cf965, 0x3d1a27a2, 0x3eeaaa98, 0x0,
0x0, 0x0, 0x3d8d0cc5, 0x3e916e54, 0x0, 0x3dd4732e,
0x3e7bfcc1, 0x0, 0x3e77d1b0, 0x3d80743a, 0x3c75e0f7, 0x3dc65b26,
0x3cb872dc, 0x3d429fa2, 0x0, 0x0, 0x3de03a8c, 0x3e851e20,
0x0, 0x3dbf8d53, 0x0, 0x0, 0x3d2ee6a4, 0x3e3f6f18,
0x3cc667e5, 0x3be227ae, 0x3d016ffc, 0x3e20fbce, 0x0, 0x3e10e7f8,
0x3e4824d5, 0x0, 0x3eb27828, 0x3e755759, 0x0, 0x0,
0x0, 0x0, 0x3d44254c, 0x3c9c5216, 0x0, 0x0,
0x0, 0x0, 0x0, 0x3dee7547, 0x0, 0x3e909c65,
0x0, 0x0, 0x3dd1f2cf, 0x3d4fffba, 0x3e9ad1f3, 0x3ecf9869,
0x3e851b77, 0x0, 0x0, 0x0, 0x0, 0x3db6f5d4,
0x3cd36c12, 0x3d88ba04, 0x0, 0x0, 0x0, 0x3ed26389,
0x3dfbaafb, 0x3b38fc61, 0x3e97a1f5, 0x3d6752c6, 0x0, 0x3dcd47db,
0x0, 0x3e02e71c, 0x3f02f762, 0x3e4dc44e, 0x0, 0x0,
0x0, 0x0, 0x3cecdfd6, 0x3da0a80e, 0x3bbac653, 0x0,
0x0, 0x0, 0x3e631998, 0x3d3bb59f, 0x3e71dfde, 0x3d2286e5,
0x3e98798d, 0x0, 0x3d6a9f74, 0x3e11df9c, 0x3e5de0f9, 0x3ea99bb4,
0x3e43c72e, 0x0, 0x0, 0x0, 0x3ea5b6ca, 0x0,
0x0, 0x0, 0x0, 0x0, 0x0, 0x0,
0x3e4ee61d, 0x0, 0x3de683ed, 0x3dbff523, 0x0, 0x3e775bb5,
0x0, 0x3e59e88a, 0x3e26091d, 0x0, 0x3dae7906, 0x0,
0x0, 0x0, 0x3d2245bc, 0x3c994d8d, 0x3e4c9d2a, 0x3e0e9a3a,
0x0, 0x3da080bc, 0x3b439034, 0x3d52f123, 0x3dbe7f95, 0x3eeebcbb,
0x3ea507c6, 0x0, 0x3e9538e6, 0x3d3f8f59, 0x3e91f5d3, 0x3e9249cf,
0x3e83c73c, 0x3de597cb, 0x0, 0x0, 0x3ca144b6, 0x3d726a89,
0x3d846794, 0x3dd78c67, 0x0, 0x0, 0x0, 0x0,
0x3e4bd329, 0x3d2304df, 0x3eabc3f0, 0x3d45e53a, 0x0, 0x3e8446f9,
0x0, 0x3e547af2, 0x3f0aa842, 0x3ec9a325, 0x0, 0x0,
0x0, 0x3dc8c105, 0x3d8ec9d0, 0x3d7fd7a1, 0x3e610f6a, 0x0,
0x0, 0x0, 0x3e489cb7, 0x3ec65a7c, 0x3e4ffac6, 0x3d55f9a6,
0x3e9ea98b, 0x0, 0x0, 0x3dd68ecb, 0x3bc64cbd, 0x3e86a09b,
0x3d554cb4, 0x0, 0x0, 0x0, 0x3d4483cf, 0x3e223af9,
0x0, 0x3dd718e7, 0x0, 0x0, 0x0, 0x3e9142f0,
0x3de3e136, 0x3e9a39ce, 0x3efa0610, 0x3d9406ea, 0x0, 0x3e38404b,
0x0, 0x0, 0x3e5f9e81, 0x3e23b360, 0x0, 0x0,
0x0, 0x0, 0x3eb5585c, 0x3dca0b72, 0x3cb1a8b7, 0x0,
0x0, 0x0, 0x3e06e740, 0x3e6ec883, 0x0, 0x3e84a3f8,
0x3dbc54c1, 0x0, 0x3e5fe31b, 0x3d6cdf21, 0x3eed45e5, 0x3e49ef86,
0x3e042067, 0x0, 0x3d8982d5, 0x3cd070fb, 0x3ededc87, 0x3e0a8bfa,
0x0, 0x3d04acd8, 0x0, 0x0, 0x3d582e3d, 0x3db17c5b,
};
test_conv2d(set, strides, input_vals, kernel_vals, bias_vals, output_exp_vals,
output_relu_exp_vals, SAME_PADDING, NULL);
}
// wrong padding type = response code F000
void test_f000_fail() {
input_set i = {1, 1, 1, 1, {1, 1}, 1};
input_set *set = &i; // just so we can copy and paste code
strides_input_set s = {1, 1};
strides_input_set *strides = &s;
zdnn_status status;
zdnn_pool_padding padding = 0xFF;
uint32_t input_dims[4] = {set->n, set->height_in, set->width_in,
set->channel_in};
uint32_t kernel_dims[4] = {set->kernel_size[0], set->kernel_size[1],
set->channel_in, set->channel_out};
uint32_t bias_dims[1] = {set->channel_out};
uint32_t output_dims[4] = {set->n, 1, 1, set->channel_out};
zdnn_ztensor *input_ztensor = alloc_ztensor_with_values(
input_dims, ZDNN_NHWC, test_datatype, NO_CONCAT, true, ZERO_ARRAY);
zdnn_ztensor *kernel_ztensor = alloc_ztensor_with_values(
kernel_dims, ZDNN_HWCK, test_datatype, NO_CONCAT, true, ZERO_ARRAY);
zdnn_ztensor *bias_ztensor = alloc_ztensor_with_values(
bias_dims, ZDNN_1D, test_datatype, NO_CONCAT, true, ZERO_ARRAY);
zdnn_ztensor *output_ztensor = alloc_ztensor_with_values(
output_dims, ZDNN_NHWC, test_datatype, NO_CONCAT, true, ZERO_ARRAY);
status = zdnn_conv2d(input_ztensor, kernel_ztensor, bias_ztensor, padding,
strides->height, strides->width, CONV2D_ACT_NONE, NULL,
output_ztensor);
TEST_ASSERT_MESSAGE(status == ZDNN_FUNC_RC_F000,
"zdnn_conv2d(): status not ZDNN_FUNC_RC_F000");
}
// wrong activation function = response code F001
void test_f001_fail() {
input_set i = {1, 1, 1, 1, {1, 1}, 1};
input_set *set = &i; // just so we can copy and paste code
strides_input_set s = {1, 1};
strides_input_set *strides = &s;
zdnn_status status;
zdnn_pool_padding padding = VALID_PADDING;
uint32_t input_dims[4] = {set->n, set->height_in, set->width_in,
set->channel_in};
uint32_t kernel_dims[4] = {set->kernel_size[0], set->kernel_size[1],
set->channel_in, set->channel_out};
uint32_t bias_dims[1] = {set->channel_out};
uint32_t output_dims[4] = {set->n, 1, 1, set->channel_out};
zdnn_ztensor *input_ztensor = alloc_ztensor_with_values(
input_dims, ZDNN_NHWC, test_datatype, NO_CONCAT, true, ZERO_ARRAY);
zdnn_ztensor *kernel_ztensor = alloc_ztensor_with_values(
kernel_dims, ZDNN_HWCK, test_datatype, NO_CONCAT, true, ZERO_ARRAY);
zdnn_ztensor *bias_ztensor = alloc_ztensor_with_values(
bias_dims, ZDNN_1D, test_datatype, NO_CONCAT, true, ZERO_ARRAY);
zdnn_ztensor *output_ztensor = alloc_ztensor_with_values(
output_dims, ZDNN_NHWC, test_datatype, NO_CONCAT, true, ZERO_ARRAY);
status =
zdnn_conv2d(input_ztensor, kernel_ztensor, bias_ztensor, padding,
strides->height, strides->width, 0xFF, NULL, output_ztensor);
TEST_ASSERT_MESSAGE(status == ZDNN_FUNC_RC_F001,
"zdnn_conv2d(): status not ZDNN_FUNC_RC_F001");
}
// both strides = 0, kernel height > 448
void test_f002_height_fail() {
input_set i = {1, 1, 1, 1, {512, 1}, 1};
input_set *set = &i; // just so we can copy and paste code
strides_input_set s = {0, 0};
strides_input_set *strides = &s;
zdnn_status status;
zdnn_pool_padding padding = VALID_PADDING;
uint32_t input_dims[4] = {set->n, set->kernel_size[0], set->kernel_size[1],
set->channel_in};
uint32_t kernel_dims[4] = {set->kernel_size[0], set->kernel_size[1],
set->channel_in, set->channel_out};
uint32_t bias_dims[1] = {set->channel_out};
uint32_t output_dims[4] = {set->n, 1, 1, set->channel_out};
zdnn_ztensor *input_ztensor = alloc_ztensor_with_values(
input_dims, ZDNN_NHWC, test_datatype, NO_CONCAT, true, ZERO_ARRAY);
zdnn_ztensor *kernel_ztensor = alloc_ztensor_with_values(
kernel_dims, ZDNN_HWCK, test_datatype, NO_CONCAT, true, ZERO_ARRAY);
zdnn_ztensor *bias_ztensor = alloc_ztensor_with_values(
bias_dims, ZDNN_1D, test_datatype, NO_CONCAT, true, ZERO_ARRAY);
zdnn_ztensor *output_ztensor = alloc_ztensor_with_values(
output_dims, ZDNN_NHWC, test_datatype, NO_CONCAT, true, ZERO_ARRAY);
status = zdnn_conv2d(input_ztensor, kernel_ztensor, bias_ztensor, padding,
strides->height, strides->width, CONV2D_ACT_NONE, NULL,
output_ztensor);
TEST_ASSERT_MESSAGE(status == ZDNN_FUNC_RC_F002,
"zdnn_conv2d(): status not ZDNN_FUNC_RC_F002");
}
// both strides = 0, kernel width > 448
void test_f002_width_fail() {
input_set i = {1, 1, 1, 1, {1, 512}, 1};
input_set *set = &i; // just so we can copy and paste code
strides_input_set s = {0, 0};
strides_input_set *strides = &s;
zdnn_status status;
zdnn_pool_padding padding = VALID_PADDING;
uint32_t input_dims[4] = {set->n, set->kernel_size[0], set->kernel_size[1],
set->channel_in};
uint32_t kernel_dims[4] = {set->kernel_size[0], set->kernel_size[1],
set->channel_in, set->channel_out};
uint32_t bias_dims[1] = {set->channel_out};
uint32_t output_dims[4] = {set->n, 1, 1, set->channel_out};
zdnn_ztensor *input_ztensor = alloc_ztensor_with_values(
input_dims, ZDNN_NHWC, test_datatype, NO_CONCAT, true, ZERO_ARRAY);
zdnn_ztensor *kernel_ztensor = alloc_ztensor_with_values(
kernel_dims, ZDNN_HWCK, test_datatype, NO_CONCAT, true, ZERO_ARRAY);
zdnn_ztensor *bias_ztensor = alloc_ztensor_with_values(
bias_dims, ZDNN_1D, test_datatype, NO_CONCAT, true, ZERO_ARRAY);
zdnn_ztensor *output_ztensor = alloc_ztensor_with_values(
output_dims, ZDNN_NHWC, test_datatype, NO_CONCAT, true, ZERO_ARRAY);
status = zdnn_conv2d(input_ztensor, kernel_ztensor, bias_ztensor, padding,
strides->height, strides->width, CONV2D_ACT_NONE, NULL,
output_ztensor);
TEST_ASSERT_MESSAGE(status == ZDNN_FUNC_RC_F002,
"zdnn_conv2d(): status not ZDNN_FUNC_RC_F002");
}
// both strides > 0, kernel height > 64
void test_f003_height_fail() {
uint32_t bad_height = 70; // output height becomes 11
// height_in must > kernel_height
input_set i = {1, bad_height + 10, 1, 1, {bad_height, 1}, 1};
input_set *set = &i; // just so we can copy and paste code
strides_input_set s = {1, 1};
strides_input_set *strides = &s;
zdnn_status status;
zdnn_pool_padding padding = VALID_PADDING;
uint32_t input_dims[4] = {set->n, set->height_in, set->width_in,
set->channel_in};
uint32_t kernel_dims[4] = {set->kernel_size[0], set->kernel_size[1],
set->channel_in, set->channel_out};
uint32_t bias_dims[1] = {set->channel_out};
uint32_t output_dims[4] = {set->n, 11, 1, set->channel_out}; //
zdnn_ztensor *input_ztensor = alloc_ztensor_with_values(
input_dims, ZDNN_NHWC, test_datatype, NO_CONCAT, true, ZERO_ARRAY);
zdnn_ztensor *kernel_ztensor = alloc_ztensor_with_values(
kernel_dims, ZDNN_HWCK, test_datatype, NO_CONCAT, true, ZERO_ARRAY);
zdnn_ztensor *bias_ztensor = alloc_ztensor_with_values(
bias_dims, ZDNN_1D, test_datatype, NO_CONCAT, true, ZERO_ARRAY);
zdnn_ztensor *output_ztensor = alloc_ztensor_with_values(
output_dims, ZDNN_NHWC, test_datatype, NO_CONCAT, true, ZERO_ARRAY);
status = zdnn_conv2d(input_ztensor, kernel_ztensor, bias_ztensor, padding,
strides->height, strides->width, CONV2D_ACT_NONE, NULL,
output_ztensor);
TEST_ASSERT_MESSAGE(status == ZDNN_FUNC_RC_F003,
"zdnn_conv2d(): status not ZDNN_FUNC_RC_F003");
}
// both strides > 0, kernel width > 64
void test_f003_width_fail() {
uint32_t bad_width = 70; // output width becomes 11
// width_in must > kernel_width
input_set i = {1, 1, bad_width + 10, 1, {1, bad_width}, 1};
input_set *set = &i; // just so we can copy and paste code
strides_input_set s = {1, 1};
strides_input_set *strides = &s;
zdnn_status status;
zdnn_pool_padding padding = VALID_PADDING;
uint32_t input_dims[4] = {set->n, set->height_in, set->width_in,
set->channel_in};
uint32_t kernel_dims[4] = {set->kernel_size[0], set->kernel_size[1],
set->channel_in, set->channel_out};
uint32_t bias_dims[1] = {set->channel_out};
uint32_t output_dims[4] = {set->n, 1, 11, set->channel_out};
zdnn_ztensor *input_ztensor = alloc_ztensor_with_values(
input_dims, ZDNN_NHWC, test_datatype, NO_CONCAT, true, ZERO_ARRAY);
zdnn_ztensor *kernel_ztensor = alloc_ztensor_with_values(
kernel_dims, ZDNN_HWCK, test_datatype, NO_CONCAT, true, ZERO_ARRAY);
zdnn_ztensor *bias_ztensor = alloc_ztensor_with_values(
bias_dims, ZDNN_1D, test_datatype, NO_CONCAT, true, ZERO_ARRAY);
zdnn_ztensor *output_ztensor = alloc_ztensor_with_values(
output_dims, ZDNN_NHWC, test_datatype, NO_CONCAT, true, ZERO_ARRAY);
status = zdnn_conv2d(input_ztensor, kernel_ztensor, bias_ztensor, padding,
strides->height, strides->width, CONV2D_ACT_NONE, NULL,
output_ztensor);
TEST_ASSERT_MESSAGE(status == ZDNN_FUNC_RC_F003,
"zdnn_conv2d(): status not ZDNN_FUNC_RC_F003");
}
// stride height > 13
void test_f004_stride_height_fail() {
uint32_t bad_stride_height = 15;
input_set i = {1, 2, 2, 1, {1, 1}, 1};
input_set *set = &i; // just so we can copy and paste code
strides_input_set s = {bad_stride_height, 1};
strides_input_set *strides = &s;
zdnn_status status;
zdnn_pool_padding padding = VALID_PADDING;
uint32_t input_dims[4] = {set->n, set->height_in, set->width_in,
set->channel_in};
uint32_t kernel_dims[4] = {set->kernel_size[0], set->kernel_size[1],
set->channel_in, set->channel_out};
uint32_t bias_dims[1] = {set->channel_out};
uint32_t output_dims[4] = {set->n, 1, 2, set->channel_out};
zdnn_ztensor *input_ztensor = alloc_ztensor_with_values(
input_dims, ZDNN_NHWC, test_datatype, NO_CONCAT, true, ZERO_ARRAY);
zdnn_ztensor *kernel_ztensor = alloc_ztensor_with_values(
kernel_dims, ZDNN_HWCK, test_datatype, NO_CONCAT, true, ZERO_ARRAY);
zdnn_ztensor *bias_ztensor = alloc_ztensor_with_values(
bias_dims, ZDNN_1D, test_datatype, NO_CONCAT, true, ZERO_ARRAY);
zdnn_ztensor *output_ztensor = alloc_ztensor_with_values(
output_dims, ZDNN_NHWC, test_datatype, NO_CONCAT, true, ZERO_ARRAY);
status = zdnn_conv2d(input_ztensor, kernel_ztensor, bias_ztensor, padding,
strides->height, strides->width, CONV2D_ACT_NONE, NULL,
output_ztensor);
TEST_ASSERT_MESSAGE(status == ZDNN_FUNC_RC_F004,
"zdnn_conv2d(): status not ZDNN_FUNC_RC_F004");
}
// stride width > 13
void test_f004_stride_width_fail() {
uint32_t bad_stride_width = 15;
input_set i = {1, 2, 2, 1, {1, 1}, 1};
input_set *set = &i; // just so we can copy and paste code
strides_input_set s = {1, bad_stride_width};
strides_input_set *strides = &s;
zdnn_status status;
zdnn_pool_padding padding = VALID_PADDING;
uint32_t input_dims[4] = {set->n, set->height_in, set->width_in,
set->channel_in};
uint32_t kernel_dims[4] = {set->kernel_size[0], set->kernel_size[1],
set->channel_in, set->channel_out};
uint32_t bias_dims[1] = {set->channel_out};
uint32_t output_dims[4] = {set->n, 2, 1, set->channel_out};
zdnn_ztensor *input_ztensor = alloc_ztensor_with_values(
input_dims, ZDNN_NHWC, test_datatype, NO_CONCAT, true, ZERO_ARRAY);
zdnn_ztensor *kernel_ztensor = alloc_ztensor_with_values(
kernel_dims, ZDNN_HWCK, test_datatype, NO_CONCAT, true, ZERO_ARRAY);
zdnn_ztensor *bias_ztensor = alloc_ztensor_with_values(
bias_dims, ZDNN_1D, test_datatype, NO_CONCAT, true, ZERO_ARRAY);
zdnn_ztensor *output_ztensor = alloc_ztensor_with_values(
output_dims, ZDNN_NHWC, test_datatype, NO_CONCAT, true, ZERO_ARRAY);
status = zdnn_conv2d(input_ztensor, kernel_ztensor, bias_ztensor, padding,
strides->height, strides->width, CONV2D_ACT_NONE, NULL,
output_ztensor);
TEST_ASSERT_MESSAGE(status == ZDNN_FUNC_RC_F004,
"zdnn_conv2d(): status not ZDNN_FUNC_RC_F004");
}
int main() {
UNITY_BEGIN();
RUN_TEST_ALL_DLFLOAT16_PRE_DATATYPES(
test_valid_padding_non_zero_strides_small);
RUN_TEST_ALL_DLFLOAT16_PRE_DATATYPES(
test_valid_padding_non_zero_strides_small_with_clip);
RUN_TEST_ALL_DLFLOAT16_PRE_DATATYPES(test_valid_padding_zero_strides_small);
RUN_TEST_ALL_DLFLOAT16_PRE_DATATYPES(
test_same_padding_non_zero_strides_small);
RUN_TEST_ALL_DLFLOAT16_PRE_DATATYPES(
test_valid_padding_non_zero_strides_medium);
RUN_TEST_ALL_DLFLOAT16_PRE_DATATYPES(test_valid_padding_zero_strides_medium);
RUN_TEST_ALL_DLFLOAT16_PRE_DATATYPES(
test_valid_padding_zero_strides_medium_with_clip);
RUN_TEST_ALL_DLFLOAT16_PRE_DATATYPES(
test_same_padding_non_zero_strides_medium);
RUN_TEST_ALL_DLFLOAT16_PRE_DATATYPES(
test_valid_padding_non_zero_strides_large);
RUN_TEST_ALL_DLFLOAT16_PRE_DATATYPES(test_valid_padding_zero_strides_large);
RUN_TEST_ALL_DLFLOAT16_PRE_DATATYPES(
test_same_padding_non_zero_strides_large);
RUN_TEST_ALL_DLFLOAT16_PRE_DATATYPES(test_f000_fail);
RUN_TEST_ALL_DLFLOAT16_PRE_DATATYPES(test_f001_fail);
RUN_TEST_ALL_DLFLOAT16_PRE_DATATYPES(test_f002_height_fail);
RUN_TEST_ALL_DLFLOAT16_PRE_DATATYPES(test_f002_width_fail);
RUN_TEST_ALL_DLFLOAT16_PRE_DATATYPES(test_f003_height_fail);
RUN_TEST_ALL_DLFLOAT16_PRE_DATATYPES(test_f003_width_fail);
RUN_TEST_ALL_DLFLOAT16_PRE_DATATYPES(test_f004_stride_height_fail);
RUN_TEST_ALL_DLFLOAT16_PRE_DATATYPES(test_f004_stride_width_fail);
return UNITY_END();
}
zDNN-1.1.2/tests/testDriver_zdnn_div_elwise.c 0000664 0000000 0000000 00000012142 15000221702 0021215 0 ustar 00root root 0000000 0000000 // SPDX-License-Identifier: Apache-2.0
/*
* Copyright IBM Corp. 2021, 2024
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#include "common_elwise.h"
void setUp(void) { VERIFY_HW_ENV; }
void tearDown(void) {}
/*
* Simple test to drive a full div api. Input tensor 1 has values greater than
* those in input tensor 2.
*/
void api_div_basic() {
/* Input 1 values as true NHWC sized (1,2,2,2)
[[
[[3, 30], [6, 60]],
[[8, 80], [9, 90]]
]]
*/
// Values in ZDNN_NHWC order
uint32_t shape[] = {1, 2, 2, 2};
float input1_values[] = {3, 30, 6, 60, 8, 80, 9, 90};
/* Input 2 values as true NHWC sized (1,2,2,2)
[[
[[1, 15], [3, 12]],
[[4, 40], [4.5, 45]]
]]
*/
// Values in ZDNN_NHWC order
float input2_values[] = {1, 15, 3, 12, 4, 40, 4.5, 15};
/* Expected values as true NHWC sized (1,2,2,2)
[[
[[3, 2], [2, 5]],
[[2, 2], [2, 6]]
]]
*/
test_elwise_api_2_inputs(shape, ZDNN_NHWC, input1_values, input2_values,
NNPA_DIV, ZDNN_OK);
}
// test to drive input tensors with 280 values in their buffer
void api_div_med_dims() {
uint32_t shape[] = {1, 7, 10, 4};
int num_io_buffer_values = shape[0] * shape[1] * shape[2] * shape[3];
// Values in ZDNN_NHWC order
float input1_values[num_io_buffer_values];
gen_random_float_array(num_io_buffer_values, input1_values);
// Values in ZDNN_NHWC order
float input2_values[num_io_buffer_values];
gen_random_float_array(num_io_buffer_values, input2_values);
test_elwise_api_2_inputs(shape, ZDNN_NHWC, input1_values, input2_values,
NNPA_DIV, ZDNN_OK);
}
// test to drive input tensors with 6825 values in their buffer
void api_div_high_dims() {
uint32_t shape[] = {1, 3, 33, 65};
int num_io_buffer_values = shape[0] * shape[1] * shape[2] * shape[3];
// Values in ZDNN_NHWC order
float input1_values[num_io_buffer_values];
gen_random_float_array(num_io_buffer_values, input1_values);
// Values in ZDNN_NHWC order
float input2_values[num_io_buffer_values];
gen_random_float_array(num_io_buffer_values, input2_values);
test_elwise_api_2_inputs(shape, ZDNN_NHWC, input1_values, input2_values,
NNPA_DIV, ZDNN_OK);
}
/*
* Simple test to drive a full div api using the data type and
* 3D layout
*/
void api_div_3D() {
/* Input 1 values as true NHWC sized (1,2,2,2)
[[
[[3, 30], [6, 60]],
[[8, 80], [9, 90]]
]]
*/
// Values in ZDNN_NHWC order
uint32_t shape[] = {2, 2, 2};
float input1_values[] = {3, 30, 6, 60, 8, 80, 9, 90};
/* Input 2 values as true NHWC sized (1,2,2,2)
[[
[[1, 10], [2, 20]],
[[4, 40], [5, 50]]
]]
*/
// Values in ZDNN_NHWC order
float input2_values[] = {1, 5, 2, 20, 4, 40, 5, 50};
/* Expected values as true NHWC sized (1,2,2,2)
[[
[[3, 150], [12, 1200]],
[[32, 3200], [45, 1400]]
]]
*/
test_elwise_api_2_inputs(shape, ZDNN_3D, input1_values, input2_values,
NNPA_DIV, ZDNN_OK);
}
/*
* Simple test to drive a full div api using the data type
* and 2 dimensional tensors
*/
void api_div_2D() {
// Values in ZDNN_NHWC order
uint32_t shape[] = {2, 2};
/* Input 1 values as true NHWC sized (1,1,2,2)
[[
[[1, 10], [2, 20]]
]]
*/
float input1_values[] = {1, 10, 2, 20};
/* Input 2 values as true NHWC sized (1,1,2,2)
[[
[[3, 20], [2, 5]]
]]
*/
float input2_values[] = {3, 20, 2, 5};
/* Expected values as true NHWC sized (1,1,2,2)
[[
[[0.33333333, 0.5], [1, 4]]
]]
*/
test_elwise_api_2_inputs(shape, ZDNN_2D, input1_values, input2_values,
NNPA_DIV, ZDNN_OK);
}
/*
* Simple test to drive a full div api using the data type
* and 1 dimensional tensors
*/
void api_div_1D() {
// Values in ZDNN_NHWC order
uint32_t shape[] = {2};
/* Input 1 values as true NHWC sized (1,1,2,2)
[[
[[10000, 12000]]
]]
*/
float input1_values[] = {10000, 12000};
/* Input 2 values as true NHWC sized (1,1,2,2)
[[
[[2.5, 4000]]
]]
*/
float input2_values[] = {2.5, 4000};
/* Expected values as true NHWC sized (1,1,2,2)
[[
[[4000, 3]]
]]
*/
test_elwise_api_2_inputs(shape, ZDNN_1D, input1_values, input2_values,
NNPA_DIV, ZDNN_OK);
}
int main() {
UNITY_BEGIN();
RUN_TEST_ALL_DLFLOAT16_PRE_DATATYPES(api_div_basic);
RUN_TEST_ALL_DLFLOAT16_PRE_DATATYPES(api_div_med_dims);
RUN_TEST_ALL_DLFLOAT16_PRE_DATATYPES(api_div_high_dims);
RUN_TEST_ALL_DLFLOAT16_PRE_DATATYPES(api_div_3D);
RUN_TEST_ALL_DLFLOAT16_PRE_DATATYPES(api_div_2D);
RUN_TEST_ALL_DLFLOAT16_PRE_DATATYPES(api_div_1D);
return UNITY_END();
}
zDNN-1.1.2/tests/testDriver_zdnn_exp_elwise.c 0000664 0000000 0000000 00000010400 15000221702 0021222 0 ustar 00root root 0000000 0000000 // SPDX-License-Identifier: Apache-2.0
/*
* Copyright IBM Corp. 2021, 2024
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#include "common_elwise.h"
void setUp(void) { VERIFY_HW_ENV; }
/**********************************************************
* FP16 tops out at 65504, so no input number larger than
* 11.089866488461016 should be used
**********************************************************/
void tearDown(void) {}
/*
* Simple test to drive a full exp api.
*/
void api_exp_basic() {
/* Input values as true NHWC sized (1,2,2,2)
[[
[[3, 4], [6, 7]],
[[10, 12], [3, 10]]
]]
*/
// Values in ZDNN_NHWC order
uint32_t shape[] = {1, 2, 2, 2};
float input_values[] = {3, 4, 6, 7, 10, 9, 3, 10};
/* Expected values as true NHWC sized (1,2,2,2)
[[
[[20.085536923, 54.598150033], [403.42879349, 1096.6331584]],
[[22026.465794, 8103.083926], [20.085536923, 22026.465794]]
]]
*/
test_elwise_api_1_input(shape, ZDNN_NHWC, input_values, NNPA_EXP, ZDNN_OK);
}
// test to drive input tensors with 280 values in their buffer.
void api_exp_med_dims() {
uint32_t shape[] = {1, 7, 10, 4};
int num_io_buffer_values = shape[0] * shape[1] * shape[2] * shape[3];
// Values in ZDNN_NHWC order
float input_values[num_io_buffer_values];
gen_random_float_array(num_io_buffer_values, input_values);
test_elwise_api_1_input(shape, ZDNN_NHWC, input_values, NNPA_EXP, ZDNN_OK);
}
// test to drive an input tensor with 6825 values in its buffer
void api_exp_high_dims() {
uint32_t shape[] = {1, 3, 33, 65};
int num_io_buffer_values = shape[0] * shape[1] * shape[2] * shape[3];
// Values in ZDNN_NHWC order
float input_values[num_io_buffer_values];
gen_random_float_array(num_io_buffer_values, input_values);
test_elwise_api_1_input(shape, ZDNN_NHWC, input_values, NNPA_EXP, ZDNN_OK);
}
/*
* Simple test to drive a full exp api using the data type
* and 3D layout
*/
void api_exp_3D() {
/* Input values as true NHWC sized (1,2,2,2)
[[
[[3, 4], [6, 7]],
[[10, 8], [3, 10]]
]]
*/
// Values in ZDNN_NHWC order
uint32_t shape[] = {2, 2, 2};
float input_values[] = {3, 4, 6, 7, 10, 5, 3, 10};
/* Expected values as true NHWC sized (1,2,2,2)
[[
[[20.085536923, 54.598150033], [403.42879349, 1096.6331584]],
[[22026.465794, 148.41315910], [20.085536923, 22026.465794]]
]]
*/
test_elwise_api_1_input(shape, ZDNN_3D, input_values, NNPA_EXP, ZDNN_OK);
}
/*
* Simple test to drive a full exp api using the data type
* and 2 dimensional tensors
*/
void api_exp_2D() {
// Values in ZDNN_NHWC order
uint32_t shape[] = {2, 2};
/* Input 1 values as true NHWC sized (1,1,2,2)
[[
[[1, 10], [2, 6]]
]]
*/
float input_values[] = {1, 10, 2, 6};
/* Expected values as true NHWC sized (1,1,2,2)
[[
[[2.718281828, 22026.465794807], [7.3890560989, 403.42879349]]
]]
*/
test_elwise_api_1_input(shape, ZDNN_2D, input_values, NNPA_EXP, ZDNN_OK);
}
/*
* Simple test to drive a full exp api using the data type
* and 1 dimensional tensors
*/
void api_exp_1D() {
// Values in ZDNN_NHWC order
uint32_t shape[] = {2};
/* Input 1 values as true NHWC sized (1,1,2,2)
[[
[[6, 7]]
]]
*/
float input_values[] = {6, 7};
/* Expected values as true NHWC sized (1,1,2,2)
[[
[[403.42879349, 1096.6331584]]
]]
*/
test_elwise_api_1_input(shape, ZDNN_1D, input_values, NNPA_EXP, ZDNN_OK);
}
int main() {
UNITY_BEGIN();
RUN_TEST_ALL_DLFLOAT16_PRE_DATATYPES(api_exp_basic);
RUN_TEST_ALL_DLFLOAT16_PRE_DATATYPES(api_exp_med_dims);
RUN_TEST_ALL_DLFLOAT16_PRE_DATATYPES(api_exp_high_dims);
RUN_TEST_ALL_DLFLOAT16_PRE_DATATYPES(api_exp_3D);
RUN_TEST_ALL_DLFLOAT16_PRE_DATATYPES(api_exp_2D);
RUN_TEST_ALL_DLFLOAT16_PRE_DATATYPES(api_exp_1D);
return UNITY_END();
}
zDNN-1.1.2/tests/testDriver_zdnn_gelu.c 0000664 0000000 0000000 00000016154 15000221702 0020026 0 ustar 00root root 0000000 0000000 // SPDX-License-Identifier: Apache-2.0
/*
* Copyright IBM Corp. 2023, 2024
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#include "common_act.h"
void setUp(void) {
VERIFY_HW_ENV;
VERIFY_PARMBLKFORMAT_1;
tol_bfloat.ulps = MAX_ULPS_BFLOAT;
tol_bfloat.epsilon_mult = MAX_EPSILON_MULT_BFLOAT;
tol_fp16.ulps = MAX_ULPS_FP16;
tol_fp16.epsilon_mult = MAX_EPSILON_MULT_FP16;
// note: zdnn_gelu_basic_random_neg_large_3d (FP32) needs custom tolerance
tol_fp32.ulps = MAX_ULPS_FLOAT;
tol_fp32.epsilon_mult = (0.003 / EPSILON_FLOAT) + 1;
}
void tearDown(void) {}
float approximate_gelu(float x) {
return 0.5 * x * (1.0 + tanhf(x * 0.7978845608 * (1.0 + 0.044715 * x * x)));
}
/**
* zdnn_gelu_test
*
* Handles all the logic to run custom tests.
*/
void zdnn_gelu_test(uint32_t *io_dims, zdnn_data_layouts layout, float *input,
zdnn_status expected_status, float *expected_values) {
/*
* Input Tensor
*/
zdnn_ztensor *input_ztensor = alloc_ztensor_with_values(
io_dims, layout, test_datatype, NO_CONCAT, false, input);
/*
* Output Tensor
*/
zdnn_ztensor *output_ztensor = alloc_ztensor_with_values(
io_dims, layout, test_datatype, NO_CONCAT, true, ZERO_ARRAY);
/*
* Begin Testing!
*/
zdnn_status status = zdnn_gelu(input_ztensor, output_ztensor);
TEST_ASSERT_MESSAGE_FORMATTED(
status == expected_status,
"call to zdnn_gelu() to returned status %08x but expected %08x\n",
status, expected_status);
// To allow for unique tolerance
fp_tolerance *tol = NULL;
switch (output_ztensor->pre_transformed_desc->type) {
case BFLOAT:
tol = &tol_bfloat;
break;
case FP16:
tol = &tol_fp16;
break;
case FP32:
tol = &tol_fp32;
break;
default:
break;
// should never get here
}
if (expected_status == ZDNN_OK) {
assert_ztensor_values_adv(output_ztensor, false, expected_values, *tol);
}
// All done--clean up the tensor buffers
free_ztensor_buffers(2, input_ztensor, output_ztensor);
}
/*
-------------------------------------------------------------------------------
GeLU Basic
Layout: NHWC
-------------------------------------------------------------------------------
*/
/**
* zdnn_gelu_basic_nhwc_basic
*
* Simple test of all 0 input values
* Expect a mirror of the Input values as the Output values
*
* Input values as NHWC
* [[
* [[0], [0], [0]],
* [[0], [0], [0]],
* [[0], [0], [0]]
* ]]
*
* Expected Output values as NHWC
* [[
* [[0], [0], [0]],
* [[0], [0], [0]],
* [[0], [0], [0]]
* ]]
*
*/
void zdnn_gelu_basic_zeros_nhwc() {
// Initialize the dimensions for our input tensor ZDNN_NHWC
uint32_t shape[] = {1, 3, 3, 1}; // Will be same for in and out dim.
int num_io_buffer_values = shape[0] * shape[1] * shape[2];
float input_values[num_io_buffer_values];
gen_float_array_zeros(num_io_buffer_values, input_values);
float expected_values[num_io_buffer_values];
gen_float_array_zeros(num_io_buffer_values, expected_values);
zdnn_gelu_test(shape, ZDNN_NHWC, input_values, ZDNN_OK, expected_values);
}
/**
* zdnn_gelu_basic_negatives_nhwc
*
* Simple test of all negative input values
*
* Input values as NHWC
* [[
* [[-1.1], [-1.2], [-1.3]],
* [[-1.4], [-1.5], [-1.6]],
* [[-1.7], [-1.8], [-1.9]]
* ]]
*
*/
void zdnn_gelu_basic_negatives_nhwc() {
// Initialize the dimensions for our input tensor ZDNN_NHWC
uint32_t shape[] = {1, 3, 3, 1}; // Will be same for in and out dim.
int num_io_buffer_values = shape[0] * shape[1] * shape[2] * shape[3];
float input_values[] = {-1.1, -1.2, -1.3, -1.4, -1.5, -1.6, -1.7, -1.8, -1.9};
float expected_values[num_io_buffer_values];
generate_expected_output(approximate_gelu, input_values, num_io_buffer_values,
expected_values);
zdnn_gelu_test(shape, ZDNN_NHWC, input_values, ZDNN_OK, expected_values);
}
/**
* zdnn_gelu_basic_random_large_nhwc
*
* Simple test of all random input values
*/
void zdnn_gelu_basic_random_large_nhwc() {
// Initialize the dimensions for our input tensor ZDNN_NHWC
uint32_t shape[] = {1, 10, 30, 60}; // Will be same for in and out dim.
int num_io_buffer_values = shape[0] * shape[1] * shape[2] * shape[3];
float input_values[num_io_buffer_values];
gen_random_float_array(num_io_buffer_values, input_values);
float expected_values[num_io_buffer_values];
generate_expected_output(approximate_gelu, input_values, num_io_buffer_values,
expected_values);
zdnn_gelu_test(shape, ZDNN_NHWC, input_values, ZDNN_OK, expected_values);
}
/*
-------------------------------------------------------------------------------
GeLU Basic
Layout: 3D
-------------------------------------------------------------------------------
*/
/**
* zdnn_gelu_basic_random_neg_large_3d
*
* Simple test of all random negative input values
*/
void zdnn_gelu_basic_random_neg_large_3d() {
// Initialize the dimensions for our input tensor ZDNN_NHWC
uint32_t shape[] = {20, 30, 40}; // Will be same for in and out dim.
int num_io_buffer_values = shape[0] * shape[1] * shape[2];
float input_values[num_io_buffer_values];
gen_random_float_array_neg(num_io_buffer_values, input_values);
float expected_values[num_io_buffer_values];
generate_expected_output(approximate_gelu, input_values, num_io_buffer_values,
expected_values);
zdnn_gelu_test(shape, ZDNN_3D, input_values, ZDNN_OK, expected_values);
}
/**
* zdnn_gelu_basic_random_large_nhwc
*
* Simple test of all random input values
*/
void zdnn_gelu_basic_random_large_3d() {
// Initialize the dimensions for our input tensor ZDNN_NHWC
uint32_t shape[] = {20, 30, 40}; // Will be same for in and out dim.
int num_io_buffer_values = shape[0] * shape[1] * shape[2];
float input_values[num_io_buffer_values];
gen_random_float_array(num_io_buffer_values, input_values);
float expected_values[num_io_buffer_values];
generate_expected_output(approximate_gelu, input_values, num_io_buffer_values,
expected_values);
zdnn_gelu_test(shape, ZDNN_3D, input_values, ZDNN_OK, expected_values);
}
int main() {
UNITY_BEGIN();
RUN_TEST_ALL_DLFLOAT16_PRE_DATATYPES(zdnn_gelu_basic_zeros_nhwc);
RUN_TEST_ALL_DLFLOAT16_PRE_DATATYPES(zdnn_gelu_basic_negatives_nhwc);
RUN_TEST_ALL_DLFLOAT16_PRE_DATATYPES(zdnn_gelu_basic_random_large_nhwc);
RUN_TEST_ALL_DLFLOAT16_PRE_DATATYPES(zdnn_gelu_basic_random_neg_large_3d);
RUN_TEST_ALL_DLFLOAT16_PRE_DATATYPES(zdnn_gelu_basic_random_large_3d);
UNITY_END();
}
zDNN-1.1.2/tests/testDriver_zdnn_getrange_ztensor.c 0000664 0000000 0000000 00000016130 15000221702 0022444 0 ustar 00root root 0000000 0000000 // SPDX-License-Identifier: Apache-2.0
/*
* Copyright IBM Corp. 2023, 2024
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#include "testsupport.h"
#include
#include
#include
typedef vector signed short vec_short;
void setUp(void) {
VERIFY_HW_ENV;
VERIFY_PARMBLKFORMAT_1;
}
void tearDown(void) {}
void approximate_min_max(const float *values, const size_t num_values,
float *expected_min, float *expected_max) {
*expected_min = FLT_MAX;
*expected_max = -FLT_MAX;
for (size_t i = 0; i < num_values; ++i) {
*expected_min = fmin(*expected_min, values[i]);
*expected_max = fmax(*expected_max, values[i]);
}
*expected_min = fmin(-0.f, CLEANSE_FP32(*expected_min));
*expected_max = fmax(0.f, CLEANSE_FP32(*expected_max));
}
/**
* zdnn_getrange_ztensor_test
*
* Handles all the logic to run custom tests.
*/
void zdnn_getrange_ztensor_test(uint32_t *dims, zdnn_data_layouts layout,
float *values, zdnn_status expected_status,
float expected_min, float expected_max) {
/*
* Input Tensor
*/
zdnn_ztensor *input_ztensor =
alloc_ztensor_with_values(dims, layout, FP32, NO_CONCAT, false, values);
float min_val, max_val;
/*
* Begin Testing!
*/
zdnn_status status = zdnn_getrange_ztensor(input_ztensor, &min_val, &max_val);
TEST_ASSERT_MESSAGE_FORMATTED(
status == expected_status,
"call to zdnn_getrange_ztensor() to returned status %08x but expected "
"%08x\n",
status, expected_status);
if (expected_status == ZDNN_OK) {
bool all_passed = true;
uint64_t big_error_message_size =
(uint64_t)sizeof(char) * ERROR_MESSAGE_STR_LENGTH * 2;
char *error_msg = malloc(big_error_message_size);
snprintf(error_msg + strlen(error_msg),
big_error_message_size - strlen(error_msg),
"Min == %f expecting %f", min_val, expected_min);
if (min_val != expected_min) {
snprintf(error_msg + strlen(error_msg),
big_error_message_size - strlen(error_msg), " <==== FAILED");
all_passed = false;
}
snprintf(error_msg + strlen(error_msg),
big_error_message_size - strlen(error_msg), "\n");
snprintf(error_msg + strlen(error_msg),
big_error_message_size - strlen(error_msg),
"Max == %f expecting %f", max_val, expected_max);
if (max_val != expected_max) {
snprintf(error_msg + strlen(error_msg),
big_error_message_size - strlen(error_msg), " <==== FAILED");
all_passed = false;
}
snprintf(error_msg + strlen(error_msg),
big_error_message_size - strlen(error_msg), "\n");
TEST_ASSERT_MESSAGE(all_passed, error_msg);
}
// All done--clean up the tensor buffer
zdnn_free_ztensor_buffer(input_ztensor);
}
/**
* zdnn_getrange_ztensor_positive_basic
*/
void zdnn_getrange_ztensor_positive_basic() {
// Initialize the dimensions for our input and output tensors ZDNN_NHWC
uint32_t dims[] = {1, 3, 3, 1};
int num_values = dims[0] * dims[1] * dims[2] * dims[3];
float values[num_values];
gen_random_float_array(num_values, values);
float expected_min, expected_max;
approximate_min_max(values, num_values, &expected_min, &expected_max);
zdnn_getrange_ztensor_test(dims, ZDNN_NHWC, values, ZDNN_OK, expected_min,
expected_max);
}
/**
* zdnn_getrange_ztensor_negative_basic
*/
void zdnn_getrange_ztensor_negative_basic() {
// Initialize the dimensions for our input and output tensors ZDNN_NHWC
uint32_t dims[] = {1, 3, 3, 1};
int num_values = dims[0] * dims[1] * dims[2] * dims[3];
float values[num_values];
gen_random_float_array_neg(num_values, values);
float expected_min, expected_max;
approximate_min_max(values, num_values, &expected_min, &expected_max);
zdnn_getrange_ztensor_test(dims, ZDNN_NHWC, values, ZDNN_OK, expected_min,
expected_max);
}
/**
* zdnn_getrange_ztensor_positive_negative_basic
*/
void zdnn_getrange_ztensor_positive_negative_basic() {
// Initialize the dimensions for our input and output tensors ZDNN_NHWC
uint32_t dims[] = {1, 3, 3, 1};
int num_values = dims[0] * dims[1] * dims[2] * dims[3];
float values[num_values];
gen_random_float_array_pos_neg(num_values, values);
float expected_min, expected_max;
approximate_min_max(values, num_values, &expected_min, &expected_max);
zdnn_getrange_ztensor_test(dims, ZDNN_NHWC, values, ZDNN_OK, expected_min,
expected_max);
}
/**
* zdnn_getrange_ztensor_positive_large
*/
void zdnn_getrange_ztensor_positive_large() {
// Initialize the dimensions for our input and output tensors ZDNN_NHWC
uint32_t dims[] = {2, 3, 33, 65};
int num_values = dims[0] * dims[1] * dims[2] * dims[3];
float values[num_values];
gen_random_float_array(num_values, values);
float expected_min, expected_max;
approximate_min_max(values, num_values, &expected_min, &expected_max);
zdnn_getrange_ztensor_test(dims, ZDNN_NHWC, values, ZDNN_OK, expected_min,
expected_max);
}
/**
* zdnn_getrange_ztensor_negative_large
*/
void zdnn_getrange_ztensor_negative_large() {
// Initialize the dimensions for our input and output tensors ZDNN_NHWC
uint32_t dims[] = {2, 3, 33, 65};
int num_values = dims[0] * dims[1] * dims[2] * dims[3];
float values[num_values];
gen_random_float_array_neg(num_values, values);
float expected_min, expected_max;
approximate_min_max(values, num_values, &expected_min, &expected_max);
zdnn_getrange_ztensor_test(dims, ZDNN_NHWC, values, ZDNN_OK, expected_min,
expected_max);
}
/**
* zdnn_getrange_ztensor_positive_negative_large
*/
void zdnn_getrange_ztensor_positive_negative_large() {
// Initialize the dimensions for our input and output tensors ZDNN_NHWC
uint32_t dims[] = {2, 3, 33, 65};
int num_values = dims[0] * dims[1] * dims[2] * dims[3];
float values[num_values];
gen_random_float_array_pos_neg(num_values, values);
float expected_min, expected_max;
approximate_min_max(values, num_values, &expected_min, &expected_max);
zdnn_getrange_ztensor_test(dims, ZDNN_NHWC, values, ZDNN_OK, expected_min,
expected_max);
}
int main() {
UNITY_BEGIN();
RUN_TEST(zdnn_getrange_ztensor_positive_basic);
RUN_TEST(zdnn_getrange_ztensor_negative_basic);
RUN_TEST(zdnn_getrange_ztensor_positive_negative_basic);
RUN_TEST(zdnn_getrange_ztensor_positive_large);
RUN_TEST(zdnn_getrange_ztensor_negative_large);
RUN_TEST(zdnn_getrange_ztensor_positive_negative_large);
UNITY_END();
}
zDNN-1.1.2/tests/testDriver_zdnn_gru_dual_layers.c 0000664 0000000 0000000 00000142706 15000221702 0022256 0 ustar 00root root 0000000 0000000 // SPDX-License-Identifier: Apache-2.0
/*
* Copyright IBM Corp. 2021, 2024
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#include "testsupport.h"
void setUp(void) {
VERIFY_HW_ENV;
tol_bfloat.ulps = MAX_ULPS_BFLOAT;
tol_bfloat.epsilon_mult = MAX_EPSILON_MULT_BFLOAT;
tol_fp16.ulps = MAX_ULPS_FP16;
tol_fp16.epsilon_mult = MAX_EPSILON_MULT_FP16;
if (is_query_parmblock_installed(NNPA_PARMBLKFORMAT_1)) {
// note: gru_bidir_to_bidir (FP32) needs custom tolerance
tol_fp32.ulps = MAX_ULPS_FLOAT;
tol_fp32.epsilon_mult = (0.003 / EPSILON_FLOAT) + 1;
} else { // set default tol values
tol_fp32.ulps = MAX_ULPS_FLOAT;
tol_fp32.epsilon_mult = MAX_EPSILON_MULT_FLOAT;
}
}
void tearDown(void) {}
zdnn_ztensor *test_layer(zdnn_ztensor *input, uint32_t *h0_shape,
void *h0_values, uint32_t *weights_shape,
void *weights_values, uint32_t *biases_shape,
void *biases_values, uint32_t *hidden_weights_shape,
void *hidden_weights_values,
uint32_t *hidden_biases_shape,
void *hidden_biases_values, uint32_t *all_ts_out_shape,
void *all_ts_out_exp_values, bool is_prev_layer_bidir,
bool is_this_layer_bidir) {
zdnn_ztensor *h0, *weights, *biases, *hidden_weights, *hidden_biases,
*all_ts_out;
h0 = alloc_ztensor_with_values(h0_shape, ZDNN_3DS, test_datatype, NO_CONCAT,
false, (float *)h0_values);
// FICO/ZRH elements coming in as one pointer instead of four or three
// pointers
uint32_t num_elements_weights =
weights_shape[0] * weights_shape[1] * weights_shape[2];
weights = alloc_ztensor_with_values(
weights_shape, ZDNN_3DS, test_datatype,
RNN_TYPE_GRU | (is_prev_layer_bidir ? PREV_LAYER_BIDIR : PREV_LAYER_UNI) |
USAGE_WEIGHTS,
false, (float *)weights_values,
(float *)weights_values + num_elements_weights,
(float *)weights_values + 2 * num_elements_weights);
uint32_t num_elements_biases = biases_shape[0] * biases_shape[1];
biases = alloc_ztensor_with_values(
biases_shape, ZDNN_2DS, test_datatype, RNN_TYPE_GRU | USAGE_BIASES, false,
(float *)biases_values, (float *)biases_values + num_elements_biases,
(float *)biases_values + 2 * num_elements_biases);
uint32_t num_elements_hidden_weights = hidden_weights_shape[0] *
hidden_weights_shape[1] *
hidden_weights_shape[2];
hidden_weights = alloc_ztensor_with_values(
hidden_weights_shape, ZDNN_3DS, test_datatype,
RNN_TYPE_GRU | USAGE_HIDDEN_WEIGHTS, false,
(float *)hidden_weights_values,
(float *)hidden_weights_values + num_elements_hidden_weights,
(float *)hidden_weights_values + 2 * num_elements_hidden_weights);
uint32_t num_elements_hidden_biases =
hidden_biases_shape[0] * hidden_biases_shape[1];
hidden_biases = alloc_ztensor_with_values(
hidden_biases_shape, ZDNN_2DS, test_datatype,
RNN_TYPE_GRU | USAGE_HIDDEN_BIASES, false, (float *)hidden_biases_values,
(float *)hidden_biases_values + num_elements_hidden_biases,
(float *)hidden_biases_values + 2 * num_elements_hidden_biases);
all_ts_out = alloc_ztensor_with_values(
all_ts_out_shape, ZDNN_4DS, test_datatype, NO_CONCAT, true, ZERO_ARRAY);
zdnn_status status =
zdnn_gru(input, h0, weights, biases, hidden_weights, hidden_biases,
is_this_layer_bidir ? BIDIR : FWD, NULL, all_ts_out);
if (status != ZDNN_OK) {
TEST_FAIL_MESSAGE_FORMATTED("%s() - zdnn_gru() not ZDNN_OK, status = %08x",
__func__, status);
}
// To allow for unique tolerance
fp_tolerance *tol = NULL;
switch (all_ts_out->pre_transformed_desc->type) {
case BFLOAT:
tol = &tol_bfloat;
break;
case FP16:
tol = &tol_fp16;
break;
case FP32:
tol = &tol_fp32;
break;
default:
break;
// should never get here
}
assert_ztensor_values_adv(all_ts_out, false, all_ts_out_exp_values, *tol);
free_ztensor_buffers(5, h0, weights, biases, hidden_weights, hidden_biases);
return all_ts_out;
}
void gru_fwd_to_fwd() {
// num_timesteps = 5
// num_batches = 2
// num_features = 4
// num_hidden = 4, 5
bool is_layer_bidir[] = {false, false};
// first layer
uint32_t input0_shape[] = {5, 2, 4};
uint32_t input0_values[] = {
0x3f80f554, 0x3eed5744, 0x3fe9598b, 0x3fde3340, 0x3fb14cbd, 0x3f3b5a0a,
0x3f82893d, 0x3e5414c8, 0x3f8b5bf7, 0x3f3c425a, 0x3fa6aeeb, 0x3f99290e,
0x3ffa48dc, 0x3fd4c5a9, 0x3fb4c3ba, 0x3f768450, 0x3f1acb50, 0x3eccc9d0,
0x3fd6c6c6, 0x3fb7bd3f, 0x3f230434, 0x3e2daec8, 0x3f9a57a9, 0x3e80dd48,
0x3f94a1a8, 0x3f64e95e, 0x3dc195b0, 0x3ff6bde7, 0x3fd094b3, 0x3fa067b8,
0x3fb1e4f7, 0x3e0b4360, 0x3fd2f78d, 0x3fbaec30, 0x3fd96d0d, 0x3ff7e13b,
0x3fcab802, 0x3e0fc588, 0x3f0dc4a2, 0x3f03ec80};
uint32_t h00_shape[] = {1, 2, 4};
uint32_t h00_values[] = {0x3f72895c, 0x3fc19f9d, 0x3f54b050, 0x3ff7834f,
0x3fdc7d0d, 0x3fc1fce3, 0x3ebcf5b4, 0x3ed3cdb4};
uint32_t weights0_shape[] = {1, 4, 4};
uint32_t weights0_values[] = {
0x3e8c896e, 0x3e9ed100, 0x3e493898, 0x3dcbca78, 0xbd813f20, 0x3ef5cf48,
0x3ed41bd0, 0xbede9cf8, 0x3c9c6440, 0xbec54796, 0x3edec5e8, 0x3d3c6690,
0x3ded4400, 0xbe3ba1ec, 0x3ee1e222, 0xbea027ac, 0xbe6311c8, 0x3e4b3424,
0x3dbed0f8, 0x3e67aa8c, 0xbc93cb20, 0x3d8a55c8, 0xbec67a6c, 0x3e4de7f8,
0x3ea3ba40, 0x3eaba4fe, 0xbeb16e18, 0xbe97a46a, 0x3efe7f82, 0xbe96bbc0,
0xbe843ed2, 0x3e1aadc8, 0xbeee948c, 0x3dbfaa08, 0x3e44e48c, 0x3eb7435c,
0x3ee3743e, 0xbdac80c8, 0xbe97a134, 0x3e3f7148, 0x3ec2a6f0, 0xbda882b8,
0x3e3bb1dc, 0xbefd5f4a, 0xbeff5dfe, 0xbe6a5f1c, 0x3e817616, 0xbea61100};
uint32_t biases0_shape[] = {1, 4};
uint32_t biases0_values[] = {0xbe7e7cdc, 0xbec42d02, 0xbeef9400, 0xbed810ec,
0xbee5f866, 0xbe72ba40, 0x3eae4fce, 0xbb546b00,
0xbdcfb470, 0x3e8c6456, 0x3e8a6774, 0xbef6e502};
uint32_t hidden_weights0_shape[] = {1, 4, 4};
uint32_t hidden_weights0_values[] = {
0xbe9bc938, 0xbdfedb88, 0x3e48ec5c, 0xbef59156, 0x3ee84968, 0x3cb8d280,
0x3cf559e0, 0xbe97bba0, 0x3eaf6fd0, 0x3d956718, 0xbe79d1cc, 0xbe002b3c,
0xbed7e164, 0x3df2ddd0, 0x3e7245d4, 0xbe7966ec, 0x3e7fa638, 0x3ef0d4f2,
0xbef3dd78, 0xbce1b020, 0x3dd65318, 0x3eac7f54, 0xbc133e80, 0x3d99bfa0,
0xbec7b396, 0xbe5f3eb0, 0xbec0c878, 0xbe51adf4, 0xbe9368e8, 0xbe00dcd4,
0xbec577a2, 0x3e97e798, 0xbe331d5c, 0xbea6c676, 0x3c63ac80, 0x3ef27eba,
0xbed1d2ba, 0xbcd23440, 0xbd30adc0, 0x3ea29306, 0xbdd9d200, 0x3eb74200,
0x3dcf3d10, 0x3ef30cc4, 0x3ddae2f8, 0xbd5288f0, 0x3ea2c660, 0xbd141d60};
uint32_t hidden_biases0_shape[] = {1, 4};
uint32_t hidden_biases0_values[] = {
0xbdc53cf0, 0xbdc90e10, 0xbee5b8ba, 0xbedd0a44, 0x3e9fb02e, 0xbec8528a,
0xbdd87bf0, 0xbe4f9a1c, 0xbda03c28, 0x3bd3e180, 0xbe6896b8, 0x3e40deb8};
uint32_t all_ts_out0_shape[] = {5, 1, 2, 4};
uint32_t all_ts_out0_exp_values[] = {
0x3dbbe338, 0x3e91c6be, 0x3f53d36d, 0xbee51fe8, 0x3f248241, 0x3f14fbd0,
0x3ed3b798, 0x3ed452e7, 0xbdfe4e88, 0x3e12cda1, 0x3f3e34b7, 0xbf103a51,
0x3e33cc72, 0x3e93fd0d, 0x3ee42df8, 0x3d790f32, 0xbe6f1ec6, 0x3c36a710,
0x3f380cab, 0xbf4b6960, 0x3ce736e8, 0x3e6fd98a, 0x3ef5e203, 0xbedb0b88,
0xbedb1a27, 0xbc8798ec, 0x3f1fe987, 0xbecfbd8c, 0x3cd23b72, 0x3e817b76,
0x3edf4e9e, 0xbe603530, 0xbeee1f9e, 0xbda1fc72, 0x3f24978b, 0xbf2e6ce5,
0xbe8638b3, 0x3e90d126, 0x3f01ed5f, 0xbdfc585c};
zdnn_ztensor *input0 =
alloc_ztensor_with_values(input0_shape, ZDNN_3DS, test_datatype,
NO_CONCAT, false, (void *)input0_values);
zdnn_ztensor *all_ts_out0 = test_layer(
input0, h00_shape, (void *)h00_values, weights0_shape,
(void *)weights0_values, biases0_shape, (void *)biases0_values,
hidden_weights0_shape, (void *)hidden_weights0_values,
hidden_biases0_shape, (void *)hidden_biases0_values, all_ts_out0_shape,
(void *)all_ts_out0_exp_values, false, is_layer_bidir[0]);
// second layer
uint32_t h01_shape[] = {1, 2, 5};
uint32_t h01_values[] = {0x3ff3351d, 0x3f88a36c, 0x3df6f2f0, 0x3fb435ce,
0x3dc6a650, 0x3f2349e4, 0x3ff383d7, 0x3ebc0f18,
0x3f8ec53f, 0x3fb923dc};
uint32_t weights1_shape[] = {1, 4, 5};
uint32_t weights1_values[] = {
0xbe8872c0, 0xbead15e7, 0xbe249da6, 0xbb5a2c80, 0xbb4def80, 0xbc1fdba0,
0xbe9d3169, 0x3ec122ee, 0xbdb50e24, 0x3d69b920, 0x3c9a1ea0, 0xbe84f4be,
0x3e2598d4, 0x3d65d3e0, 0xbeb31aa8, 0x3e9399fc, 0x3ea04420, 0x3d67f3b0,
0xbdd123b0, 0xbe700636, 0x3eb7196c, 0x3ea38344, 0xbddc3fcc, 0x3eb5ccc2,
0xbea16940, 0xbeb90843, 0x3dffaaa8, 0xbdc09e0c, 0x3e9cab54, 0xbe7c17a9,
0x3d448f50, 0x3e5c0bbc, 0xbebcb154, 0xbea1834a, 0xbe856c8e, 0xbdbfc268,
0x3e21ba5c, 0x3e7822c0, 0x3ca36520, 0x3e1c8044, 0x3eb8e4f2, 0x3e256d64,
0xbea317e4, 0x3ba04b00, 0x3e8c10dc, 0xbeb9d294, 0x3c4f7420, 0xbe01fea6,
0x3ebcdbe4, 0xbe90c29a, 0xbd0388a0, 0xbec66e3b, 0xbe19a60a, 0x3d64ada0,
0x3e4d6418, 0x3ee28262, 0x3e62db50, 0xbd87a1dc, 0x3ecd16fc, 0xbea1dc41};
uint32_t biases1_shape[] = {1, 5};
uint32_t biases1_values[] = {0x3e3d5c4c, 0x3d0f9250, 0x3d190310, 0x3d64ba10,
0xbeb401c3, 0xbe271822, 0x3e2dd3f4, 0x3e987cda,
0xbe6f1e1b, 0x3b9084c0, 0x3d0ff4f8, 0x3e9e8ea0,
0x3ece54c0, 0x3e86944e, 0x3d757710};
uint32_t hidden_weights1_shape[] = {1, 5, 5};
uint32_t hidden_weights1_values[] = {
0x3eafbdf8, 0x3ebc395c, 0x3dc5e080, 0xbd506310, 0x3eb682ba, 0x3e261b9c,
0x3df90638, 0xbe807ef0, 0x3e0332e4, 0x3d952498, 0xbe18ef8e, 0xbe58ace5,
0xbecc25a9, 0x3c696e40, 0x3ebdf640, 0xbdfff1d8, 0xbe574539, 0x3ec8c806,
0xbe9a126e, 0xbe1985b8, 0x3e074a8c, 0xbed87cba, 0xbe94b2a7, 0xbeb9158a,
0x3e06e404, 0xbe4216de, 0x3a3bae00, 0x3bc9f900, 0xbe05dde4, 0xbe5bef69,
0x3e06b148, 0x3e6bc304, 0xbd9bb79c, 0xbe87f0ac, 0xbe98cd9b, 0x3e1735dc,
0xbedd7037, 0xbe71302b, 0xbe295dd2, 0xbe83e971, 0x3eabc840, 0x3ea58b16,
0x3d721bf0, 0xbee2f252, 0x3e83a64e, 0xbe136b9a, 0xbebd57dc, 0x3ebd57a4,
0x3e4eb6e0, 0x3e72843c, 0xbdd1716c, 0xbc172600, 0x3e3b9ae0, 0x3dd306b8,
0x3e354500, 0xbca2ec60, 0xbdcdfc84, 0xbe19fc78, 0x3db3dd28, 0xbd669538,
0xbe8b7474, 0xbe8d2560, 0xbe5cf1d4, 0xbeaa02a3, 0xbebbb5a4, 0x3e1ae0c4,
0x3e9e5868, 0x3da48928, 0x3d987eb0, 0xbd8d3050, 0x3e10c984, 0xbeaa740b,
0xbe6de235, 0x3e430d88, 0x3e1f0c64};
uint32_t hidden_biases1_shape[] = {1, 5};
uint32_t hidden_biases1_values[] = {
0x3e915990, 0xbe9e462c, 0x3e332b14, 0x3eace9cc, 0x3ee4e29a,
0x3e55de1c, 0xbe5ec821, 0xbebdbf60, 0xbec4e626, 0x3ee46d12,
0x3ec83690, 0x3eb165e2, 0xbdd1fa20, 0xbe20b66c, 0x3ebbff92};
uint32_t all_ts_out1_shape[] = {5, 1, 2, 5};
uint32_t all_ts_out1_exp_values[] = {
0x3faed879, 0x3f212916, 0x3e315ac6, 0x3f584762, 0x3e773eca, 0x3f1a0e70,
0x3f30b93c, 0x3cd3b530, 0x3f55a8be, 0x3f7349dc, 0x3f67b47e, 0x3ea2e278,
0x3e728624, 0x3f05967e, 0x3eae0ece, 0x3f01eb96, 0x3ed38c5e, 0x3d76c0f6,
0x3f1f4629, 0x3f31922a, 0x3f018d7e, 0x3db2f8c8, 0x3e962ef5, 0x3e91c9db,
0x3ed82154, 0x3eab6769, 0x3e736499, 0x3e04e914, 0x3eda874c, 0x3f183a7d,
0x3e91da9e, 0x3cec7d07, 0x3eaa587e, 0x3e519080, 0x3ed40a7e, 0x3e783466,
0x3e5bb505, 0x3e2863f0, 0x3eb7d4c3, 0x3efefd82, 0x3daf16aa, 0xbd5aa005,
0x3eba1ae9, 0x3dcca13b, 0x3edeb73c, 0x3e1b0164, 0x3e316c76, 0x3e5809ce,
0x3eaea292, 0x3ec931ed};
zdnn_ztensor *all_ts_out1 = test_layer(
all_ts_out0, h01_shape, (void *)h01_values, weights1_shape,
(void *)weights1_values, biases1_shape, (void *)biases1_values,
hidden_weights1_shape, (void *)hidden_weights1_values,
hidden_biases1_shape, (void *)hidden_biases1_values, all_ts_out1_shape,
(void *)all_ts_out1_exp_values, is_layer_bidir[0], is_layer_bidir[1]);
free_ztensor_buffers(3, input0, all_ts_out0, all_ts_out1);
}
void gru_fwd_to_bidir() {
// num_timesteps = 5
// num_batches = 2
// num_features = 4
// num_hidden = 4, 5
bool is_layer_bidir[] = {false, true};
// first layer
uint32_t input0_shape[] = {5, 2, 4};
uint32_t input0_values[] = {
0x3f80f554, 0x3eed5744, 0x3fe9598b, 0x3fde3340, 0x3fb14cbd, 0x3f3b5a0a,
0x3f82893d, 0x3e5414c8, 0x3f8b5bf7, 0x3f3c425a, 0x3fa6aeeb, 0x3f99290e,
0x3ffa48dc, 0x3fd4c5a9, 0x3fb4c3ba, 0x3f768450, 0x3f1acb50, 0x3eccc9d0,
0x3fd6c6c6, 0x3fb7bd3f, 0x3f230434, 0x3e2daec8, 0x3f9a57a9, 0x3e80dd48,
0x3f94a1a8, 0x3f64e95e, 0x3dc195b0, 0x3ff6bde7, 0x3fd094b3, 0x3fa067b8,
0x3fb1e4f7, 0x3e0b4360, 0x3fd2f78d, 0x3fbaec30, 0x3fd96d0d, 0x3ff7e13b,
0x3fcab802, 0x3e0fc588, 0x3f0dc4a2, 0x3f03ec80};
uint32_t h00_shape[] = {1, 2, 4};
uint32_t h00_values[] = {0x3f72895c, 0x3fc19f9d, 0x3f54b050, 0x3ff7834f,
0x3fdc7d0d, 0x3fc1fce3, 0x3ebcf5b4, 0x3ed3cdb4};
uint32_t weights0_shape[] = {1, 4, 4};
uint32_t weights0_values[] = {
0x3e8c896e, 0x3e9ed100, 0x3e493898, 0x3dcbca78, 0xbd813f20, 0x3ef5cf48,
0x3ed41bd0, 0xbede9cf8, 0x3c9c6440, 0xbec54796, 0x3edec5e8, 0x3d3c6690,
0x3ded4400, 0xbe3ba1ec, 0x3ee1e222, 0xbea027ac, 0xbe6311c8, 0x3e4b3424,
0x3dbed0f8, 0x3e67aa8c, 0xbc93cb20, 0x3d8a55c8, 0xbec67a6c, 0x3e4de7f8,
0x3ea3ba40, 0x3eaba4fe, 0xbeb16e18, 0xbe97a46a, 0x3efe7f82, 0xbe96bbc0,
0xbe843ed2, 0x3e1aadc8, 0xbeee948c, 0x3dbfaa08, 0x3e44e48c, 0x3eb7435c,
0x3ee3743e, 0xbdac80c8, 0xbe97a134, 0x3e3f7148, 0x3ec2a6f0, 0xbda882b8,
0x3e3bb1dc, 0xbefd5f4a, 0xbeff5dfe, 0xbe6a5f1c, 0x3e817616, 0xbea61100};
uint32_t biases0_shape[] = {1, 4};
uint32_t biases0_values[] = {0xbe7e7cdc, 0xbec42d02, 0xbeef9400, 0xbed810ec,
0xbee5f866, 0xbe72ba40, 0x3eae4fce, 0xbb546b00,
0xbdcfb470, 0x3e8c6456, 0x3e8a6774, 0xbef6e502};
uint32_t hidden_weights0_shape[] = {1, 4, 4};
uint32_t hidden_weights0_values[] = {
0xbe9bc938, 0xbdfedb88, 0x3e48ec5c, 0xbef59156, 0x3ee84968, 0x3cb8d280,
0x3cf559e0, 0xbe97bba0, 0x3eaf6fd0, 0x3d956718, 0xbe79d1cc, 0xbe002b3c,
0xbed7e164, 0x3df2ddd0, 0x3e7245d4, 0xbe7966ec, 0x3e7fa638, 0x3ef0d4f2,
0xbef3dd78, 0xbce1b020, 0x3dd65318, 0x3eac7f54, 0xbc133e80, 0x3d99bfa0,
0xbec7b396, 0xbe5f3eb0, 0xbec0c878, 0xbe51adf4, 0xbe9368e8, 0xbe00dcd4,
0xbec577a2, 0x3e97e798, 0xbe331d5c, 0xbea6c676, 0x3c63ac80, 0x3ef27eba,
0xbed1d2ba, 0xbcd23440, 0xbd30adc0, 0x3ea29306, 0xbdd9d200, 0x3eb74200,
0x3dcf3d10, 0x3ef30cc4, 0x3ddae2f8, 0xbd5288f0, 0x3ea2c660, 0xbd141d60};
uint32_t hidden_biases0_shape[] = {1, 4};
uint32_t hidden_biases0_values[] = {
0xbdc53cf0, 0xbdc90e10, 0xbee5b8ba, 0xbedd0a44, 0x3e9fb02e, 0xbec8528a,
0xbdd87bf0, 0xbe4f9a1c, 0xbda03c28, 0x3bd3e180, 0xbe6896b8, 0x3e40deb8};
uint32_t all_ts_out0_shape[] = {5, 1, 2, 4};
uint32_t all_ts_out0_exp_values[] = {
0x3dbbe338, 0x3e91c6be, 0x3f53d36d, 0xbee51fe8, 0x3f248241, 0x3f14fbd0,
0x3ed3b798, 0x3ed452e7, 0xbdfe4e88, 0x3e12cda1, 0x3f3e34b7, 0xbf103a51,
0x3e33cc72, 0x3e93fd0d, 0x3ee42df8, 0x3d790f32, 0xbe6f1ec6, 0x3c36a710,
0x3f380cab, 0xbf4b6960, 0x3ce736e8, 0x3e6fd98a, 0x3ef5e203, 0xbedb0b88,
0xbedb1a27, 0xbc8798ec, 0x3f1fe987, 0xbecfbd8c, 0x3cd23b72, 0x3e817b76,
0x3edf4e9e, 0xbe603530, 0xbeee1f9e, 0xbda1fc72, 0x3f24978b, 0xbf2e6ce5,
0xbe8638b3, 0x3e90d126, 0x3f01ed5f, 0xbdfc585c};
zdnn_ztensor *input0 =
alloc_ztensor_with_values(input0_shape, ZDNN_3DS, test_datatype,
NO_CONCAT, false, (void *)input0_values);
zdnn_ztensor *all_ts_out0 = test_layer(
input0, h00_shape, (void *)h00_values, weights0_shape,
(void *)weights0_values, biases0_shape, (void *)biases0_values,
hidden_weights0_shape, (void *)hidden_weights0_values,
hidden_biases0_shape, (void *)hidden_biases0_values, all_ts_out0_shape,
(void *)all_ts_out0_exp_values, false, is_layer_bidir[0]);
// second layer
uint32_t h01_shape[] = {2, 2, 5};
uint32_t h01_values[] = {0x3ff3351d, 0x3f88a36c, 0x3df6f2f0, 0x3fb435ce,
0x3dc6a650, 0x3f2349e4, 0x3ff383d7, 0x3ebc0f18,
0x3f8ec53f, 0x3fb923dc, 0x3e4d27b0, 0x3fe76faa,
0x3f6487aa, 0x3f9acc98, 0x3e925fd4, 0x3f3889fc,
0x3f04fd9a, 0x3f259760, 0x3f9ec7e0, 0x3f9aeb4a};
uint32_t weights1_shape[] = {2, 4, 5};
uint32_t weights1_values[] = {
0x3e2598d4, 0x3d65d3e0, 0xbeb31aa8, 0xbd0388a0, 0xbec66e3b, 0x3d67f3b0,
0xbdd123b0, 0xbe700636, 0x3ee28262, 0x3e62db50, 0xbb5a2c80, 0xbb4def80,
0x3eb8e4f2, 0x3e256d64, 0xbea317e4, 0xbdb50e24, 0x3d69b920, 0xbeb9d294,
0x3c4f7420, 0xbe01fea6, 0xbe5f6ed0, 0x3ebb7968, 0xbdea86c4, 0x3ee4e636,
0x3e5bbc44, 0xbedd358e, 0x3ea3e864, 0x3e497f5c, 0x3e6d851c, 0x3d527bf8,
0xbe81ffa5, 0x3eb0cbec, 0x3ecbffd4, 0x3e8e5746, 0xbddabb30, 0x3ebc5350,
0x3ecb999a, 0x3e177f54, 0xbe20471c, 0xbe811315, 0xbebcb154, 0xbea1834a,
0xbe856c8e, 0x3c9a1ea0, 0xbe84f4be, 0x3e7822c0, 0x3ca36520, 0x3e1c8044,
0x3e9399fc, 0x3ea04420, 0x3eb5ccc2, 0xbea16940, 0xbe8872c0, 0xbead15e7,
0xbe249da6, 0x3e9cab54, 0xbe7c17a9, 0xbc1fdba0, 0xbe9d3169, 0x3ec122ee,
0x3e3248cc, 0x3e5f481c, 0xbee1e40d, 0x3ed6d390, 0xbd93fe10, 0x3da2aec0,
0xbe9fee66, 0xbeb7e0dd, 0x3eb76f78, 0xbe94b3e4, 0x3e42d780, 0x3dcedbf0,
0x3eb4c482, 0xbecc7bce, 0x3e9eff90, 0xbe1b9f76, 0xbe9aebe8, 0x3e77c3f8,
0xbe9c4230, 0xbead1b0c, 0xbe19a60a, 0x3d64ada0, 0x3e4d6418, 0x3e1735dc,
0x3e6bc304, 0xbd87a1dc, 0x3ecd16fc, 0xbea1dc41, 0x3eabc840, 0xbedd7037,
0x3ba04b00, 0x3e8c10dc, 0xbe4216de, 0xbe136b9a, 0x3ea58b16, 0x3ebcdbe4,
0xbe90c29a, 0x3e06b148, 0x3a3bae00, 0xbebd57dc, 0xbe807b12, 0xbd507b08,
0x3d082a00, 0xbeadd2f6, 0x3e80b7ea, 0xbeb030cc, 0xbe8480f1, 0xbe58367b,
0x3edb2580, 0xbe8219a4, 0x3e99b77e, 0x3eb0f98a, 0x3ed26ffe, 0x3eade05a,
0xbd8f889c, 0x3ea2c8c8, 0x3e926d0a, 0x3e3b45e4, 0xbe26eada, 0x3ec26bea};
uint32_t biases1_shape[] = {2, 5};
uint32_t biases1_values[] = {
0x3e55de1c, 0xbe5ec821, 0xbebdbf60, 0xbec4e626, 0x3ee46d12, 0xbea06a9a,
0x3ec02c1a, 0xbd472b98, 0xbebfde02, 0xbe77c691, 0x3d0ff4f8, 0x3e9e8ea0,
0x3ece54c0, 0x3e86944e, 0x3d757710, 0x3c6b9ce0, 0xbe0d9648, 0xbdc724ec,
0x3d737210, 0x3e630230, 0x3e915990, 0xbe9e462c, 0x3e332b14, 0x3eace9cc,
0x3ee4e29a, 0x3eca9984, 0x3ed16702, 0xbed417e7, 0x3ea17e98, 0x3e658114};
uint32_t hidden_weights1_shape[] = {2, 5, 5};
uint32_t hidden_weights1_values[] = {
0x3dc5e080, 0xbd506310, 0x3eb682ba, 0xbdd1716c, 0xbc172600, 0xbe807ef0,
0x3e0332e4, 0x3d952498, 0xbca2ec60, 0xbdcdfc84, 0xbecc25a9, 0x3c696e40,
0x3ebdf640, 0xbe8b7474, 0xbe8d2560, 0x3ec8c806, 0xbe9a126e, 0xbe1985b8,
0x3e1ae0c4, 0x3e9e5868, 0xbe94b2a7, 0xbeb9158a, 0x3e06e404, 0x3e10c984,
0xbeaa740b, 0x3e63833c, 0x3e99f81e, 0x3ca711d0, 0x3e675c3c, 0xbeb798f6,
0xbecfe0c1, 0xbed8b7ed, 0xbece783b, 0xbe972362, 0xbe03b7b6, 0xbedc1c4e,
0x3ebe51d8, 0x3ebde4ee, 0x3ebf18f2, 0xbee0d2e5, 0xbede7c01, 0xbe37306c,
0x3e769414, 0x3cc4e590, 0xbe325de8, 0xbdb9cd1c, 0x3e062014, 0x3ee39938,
0x3e592a78, 0x3dc59638, 0x3bc9f900, 0xbe05dde4, 0xbe5bef69, 0x3eafbdf8,
0x3ebc395c, 0xbd9bb79c, 0xbe87f0ac, 0xbe98cd9b, 0x3e261b9c, 0x3df90638,
0xbe71302b, 0xbe295dd2, 0xbe83e971, 0xbe18ef8e, 0xbe58ace5, 0x3d721bf0,
0xbee2f252, 0x3e83a64e, 0xbdfff1d8, 0xbe574539, 0x3ebd57a4, 0x3e4eb6e0,
0x3e72843c, 0x3e074a8c, 0xbed87cba, 0x3c976030, 0x3d2f4d98, 0x3e5b9460,
0xbe436636, 0x3cf049b0, 0xbea1ef22, 0x3ed3c2e8, 0x3e6328f4, 0x3e24fec4,
0xbe989ba1, 0xbe190f96, 0x3cc42620, 0xbed14480, 0xbea299d4, 0xbe24134e,
0xbdf89d64, 0xbe8d6097, 0xbda3e468, 0x3e2a3b28, 0x3dc7ff90, 0xbdb0b3c4,
0x3cbbc620, 0xbeaa2909, 0x3ec258fa, 0xbeae8cee, 0x3e3b9ae0, 0x3dd306b8,
0x3e354500, 0xbe271822, 0x3e3d5c4c, 0xbe19fc78, 0x3db3dd28, 0xbd669538,
0x3e2dd3f4, 0x3d0f9250, 0xbe5cf1d4, 0xbeaa02a3, 0xbebbb5a4, 0x3e987cda,
0x3d190310, 0x3da48928, 0x3d987eb0, 0xbd8d3050, 0xbe6f1e1b, 0x3d64ba10,
0xbe6de235, 0x3e430d88, 0x3e1f0c64, 0x3b9084c0, 0xbeb401c3, 0xbeb3c7f8,
0xbeb1870e, 0xbd4e46b0, 0xbe81b1a9, 0x3e6ef9a8, 0x3e11fa20, 0xbe0d48c0,
0x3e20904c, 0x3e5c50f0, 0xbd3aa670, 0x3e75d434, 0x3e4904fc, 0xbee0a324,
0xbea1a3c0, 0x3eb037d8, 0x3d7f2f50, 0x3ee1dbc6, 0xbec39102, 0xbe62d375,
0x3e8db48a, 0xbe9933c8, 0x3e83aa94, 0x3e55ae7c, 0xbebc9a53, 0x3e7d66c4};
uint32_t hidden_biases1_shape[] = {2, 5};
uint32_t hidden_biases1_values[] = {
0x3e804fe0, 0xbe89ca96, 0x3ecdd9da, 0x3e5d42c8, 0x3e79a49c, 0xbe0751fa,
0x3e1940d8, 0xbe03c1e6, 0x3e8d90bc, 0xbdfe1e6c, 0x3ec83690, 0x3eb165e2,
0xbdd1fa20, 0xbe20b66c, 0x3ebbff92, 0x3e878898, 0x3ec1d528, 0xbe76cf7f,
0x3e109bc4, 0x3e3b6830, 0xbe8f83dc, 0x3e036284, 0xbe2089f6, 0x3eb2e8ec,
0xbda4ce70, 0xbe2ab878, 0x3de69348, 0x3e226e48, 0xbe5fbd62, 0x3d21ed48};
uint32_t all_ts_out1_shape[] = {5, 2, 2, 5};
uint32_t all_ts_out1_exp_values[] = {
0x3fb07596, 0x3f0724ff, 0x3d69c926, 0x3f5b7fa7, 0x3e9f50e4, 0x3e47b948,
0x3f169176, 0x3e5bd372, 0x3f6ed378, 0x3f48821b, 0x3e88bf42, 0x3f1503cf,
0xbe228c59, 0x3f0cf024, 0x3d87f2ca, 0x3e298037, 0x3f0cee29, 0xbe29a96c,
0x3e9dca6b, 0x3ec26071, 0x3f6dcbb8, 0x3ebb0660, 0x3c74d650, 0x3f117062,
0x3ee3a265, 0x3db18655, 0x3e677394, 0x3db978f5, 0x3f3bbe5a, 0x3f1d1df8,
0x3e9c2766, 0x3f1e9c69, 0xbe214906, 0x3f0e5d23, 0x3d5ee33f, 0x3e809bf2,
0x3f0fa73a, 0xbe160d84, 0x3ec20162, 0x3e9546b7, 0x3f0d0270, 0x3ea79919,
0xbc9908d0, 0x3ed83b4c, 0x3f0a1d62, 0xbba4d360, 0x3e5a624c, 0x3a3d5a00,
0x3f19d3f1, 0x3f137653, 0x3ea3d178, 0x3f33a091, 0xbdfac5f0, 0x3f118a1b,
0x3db83b0a, 0x3e44b9c6, 0x3f10e4c8, 0xbdc43ac6, 0x3edf58bb, 0x3e902384,
0x3ea54086, 0x3e589718, 0xbd23b7e4, 0x3eb6e5d1, 0x3f0c00aa, 0xbd1787d8,
0x3e2d45a3, 0xbd1be2d7, 0x3f094a68, 0x3f050376, 0x3ef25316, 0x3f66231a,
0xbbf4ad80, 0x3f1f5c9d, 0x3e57ea78, 0x3e7b6112, 0x3f182175, 0x3da612d0,
0x3f04cef6, 0x3ee8d645, 0x3e136cf0, 0x3e5768e7, 0xbd873546, 0x3ea25dfa,
0x3f133a2c, 0xbce72cb2, 0x3e1813fb, 0xbdb1ffc8, 0x3ef86608, 0x3eea8306,
0x3f02832e, 0x3f97b462, 0x3e814796, 0x3f4e1d12, 0x3e7082c4, 0x3ea66204,
0x3f147ff7, 0x3ebb4a06, 0x3f477b01, 0x3f2985fa};
zdnn_ztensor *all_ts_out1 = test_layer(
all_ts_out0, h01_shape, (void *)h01_values, weights1_shape,
(void *)weights1_values, biases1_shape, (void *)biases1_values,
hidden_weights1_shape, (void *)hidden_weights1_values,
hidden_biases1_shape, (void *)hidden_biases1_values, all_ts_out1_shape,
(void *)all_ts_out1_exp_values, is_layer_bidir[0], is_layer_bidir[1]);
free_ztensor_buffers(3, input0, all_ts_out0, all_ts_out1);
}
void gru_bidir_to_bidir() {
// num_timesteps = 5
// num_batches = 2
// num_features = 4
// num_hidden = 4, 5
bool is_layer_bidir[] = {true, true};
// first layer
uint32_t input0_shape[] = {5, 2, 4};
uint32_t input0_values[] = {
0x3f80f554, 0x3eed5744, 0x3fe9598b, 0x3fde3340, 0x3fb14cbd, 0x3f3b5a0a,
0x3f82893d, 0x3e5414c8, 0x3f8b5bf7, 0x3f3c425a, 0x3fa6aeeb, 0x3f99290e,
0x3ffa48dc, 0x3fd4c5a9, 0x3fb4c3ba, 0x3f768450, 0x3f1acb50, 0x3eccc9d0,
0x3fd6c6c6, 0x3fb7bd3f, 0x3f230434, 0x3e2daec8, 0x3f9a57a9, 0x3e80dd48,
0x3f94a1a8, 0x3f64e95e, 0x3dc195b0, 0x3ff6bde7, 0x3fd094b3, 0x3fa067b8,
0x3fb1e4f7, 0x3e0b4360, 0x3fd2f78d, 0x3fbaec30, 0x3fd96d0d, 0x3ff7e13b,
0x3fcab802, 0x3e0fc588, 0x3f0dc4a2, 0x3f03ec80};
uint32_t h00_shape[] = {2, 2, 4};
uint32_t h00_values[] = {0x3f72895c, 0x3fc19f9d, 0x3f54b050, 0x3ff7834f,
0x3fdc7d0d, 0x3fc1fce3, 0x3ebcf5b4, 0x3ed3cdb4,
0x3fb8c472, 0x3f849e59, 0x3eb88b80, 0x3bc03f00,
0x3f1a65ee, 0x3f5d6a8e, 0x3ea8b604, 0x3fcb5de0};
uint32_t weights0_shape[] = {2, 4, 4};
uint32_t weights0_values[] = {
0x3e493898, 0x3dcbca78, 0xbeee948c, 0x3dbfaa08, 0x3ed41bd0, 0xbede9cf8,
0x3ee3743e, 0xbdac80c8, 0x3edec5e8, 0x3d3c6690, 0x3ec2a6f0, 0xbda882b8,
0x3ee1e222, 0xbea027ac, 0xbeff5dfe, 0xbe6a5f1c, 0xbeb493ac, 0xbe952c30,
0x3cac4fa0, 0xbe94a63c, 0x3cb6ae60, 0x3e2ef934, 0x3ea50604, 0x3eb32ed6,
0xbeb47690, 0xbe988dc4, 0xbec183fa, 0xbe380bcc, 0xbe8cec88, 0xbc32ba00,
0xbeafbf44, 0x3ed7eee0, 0x3dbed0f8, 0x3e67aa8c, 0x3e8c896e, 0x3e9ed100,
0xbec67a6c, 0x3e4de7f8, 0xbd813f20, 0x3ef5cf48, 0xbeb16e18, 0xbe97a46a,
0x3c9c6440, 0xbec54796, 0xbe843ed2, 0x3e1aadc8, 0x3ded4400, 0xbe3ba1ec,
0xbd6c53f0, 0x3d5bc2b0, 0x3e7604cc, 0xbed2f700, 0xbe648f70, 0xbdd664c0,
0x3e34d140, 0x3e8ab64c, 0x3eccb614, 0x3eb6d016, 0xbdf63f00, 0x3ecb4226,
0xbecedf54, 0x3e0eec08, 0xbdd75a50, 0x3eaf295c, 0x3e44e48c, 0x3eb7435c,
0x3e7fa638, 0x3ef0d4f2, 0xbe97a134, 0x3e3f7148, 0x3dd65318, 0x3eac7f54,
0x3e3bb1dc, 0xbefd5f4a, 0xbec7b396, 0xbe5f3eb0, 0x3e817616, 0xbea61100,
0xbe9368e8, 0xbe00dcd4, 0x3e3924a0, 0x3d807a40, 0xbec83e98, 0xbd130f20,
0x3d81aa40, 0xbde9d330, 0xbe862d7a, 0x3efd3ec0, 0xbb73ed00, 0xbb663e00,
0x3eceb7d8, 0x3e38f410, 0xbdca6d08, 0x3d82a7c0, 0xbecfc186, 0x3c67f0c0};
uint32_t biases0_shape[] = {2, 4};
uint32_t biases0_values[] = {
0x3e9fb02e, 0xbec8528a, 0xbdd87bf0, 0xbe4f9a1c, 0x3ee07afa, 0xbea63fd0,
0xbd68fbd0, 0x3e12af48, 0xbdcfb470, 0x3e8c6456, 0x3e8a6774, 0xbef6e502,
0xbef20a42, 0x3ddd3bb0, 0xbe8fa9a8, 0xbee43e50, 0xbdc53cf0, 0xbdc90e10,
0xbee5b8ba, 0xbedd0a44, 0x3c827de0, 0xbeac41fa, 0xbeceee2c, 0x3ecc0d98};
uint32_t hidden_weights0_shape[] = {2, 4, 4};
uint32_t hidden_weights0_values[] = {
0x3e48ec5c, 0xbef59156, 0xbe331d5c, 0xbea6c676, 0x3cf559e0, 0xbe97bba0,
0xbed1d2ba, 0xbcd23440, 0xbe79d1cc, 0xbe002b3c, 0xbdd9d200, 0x3eb74200,
0x3e7245d4, 0xbe7966ec, 0x3ddae2f8, 0xbd5288f0, 0x3e290ef0, 0x3e83cb7a,
0x3be1d000, 0x3ed3b0f2, 0x3ec00ef2, 0xbef7935a, 0xbdae18e0, 0xbe15aae8,
0xbe24d228, 0x3eb91542, 0xbe86d40a, 0xbe97fc56, 0x3a51d400, 0xbed3b130,
0x3d8757d8, 0xbe3d5b84, 0xbef3dd78, 0xbce1b020, 0xbe9bc938, 0xbdfedb88,
0xbc133e80, 0x3d99bfa0, 0x3ee84968, 0x3cb8d280, 0xbec0c878, 0xbe51adf4,
0x3eaf6fd0, 0x3d956718, 0xbec577a2, 0x3e97e798, 0xbed7e164, 0x3df2ddd0,
0xbeddda26, 0xbe2bc8cc, 0x3d7fab80, 0x3e65a254, 0x3e7da22c, 0xbd97a438,
0x3ee54c20, 0xbeb4f724, 0xbeb65808, 0x3bb33680, 0x3e9c9930, 0xbe58ff9c,
0xbe1156a8, 0x3ed32696, 0xbea1d8c6, 0x3e169740, 0x3c63ac80, 0x3ef27eba,
0xbee5f866, 0xbe7e7cdc, 0xbd30adc0, 0x3ea29306, 0xbe72ba40, 0xbec42d02,
0x3dcf3d10, 0x3ef30cc4, 0x3eae4fce, 0xbeef9400, 0x3ea2c660, 0xbd141d60,
0xbb546b00, 0xbed810ec, 0xbefdbbe6, 0xbe937b62, 0x3e39b6d8, 0x3ed270de,
0x3e671d1c, 0x3e933052, 0xbe2afcc4, 0x3e0b3574, 0xbe75e520, 0x3e879224,
0xbe0f13d4, 0xbe72401c, 0xbeaad6d0, 0x3ec47c50, 0x3e174298, 0xbe70adfc};
uint32_t hidden_biases0_shape[] = {2, 4};
uint32_t hidden_biases0_values[] = {
0xbee66a3a, 0xbd0a36c0, 0x3ee121a2, 0xbe50d738, 0xbdea2a18, 0xbcb62760,
0xbe9bea52, 0x3e2d28ac, 0xbda03c28, 0x3bd3e180, 0xbe6896b8, 0x3e40deb8,
0x3da6bf30, 0x3ed46246, 0xbe2ba4a8, 0x3e16cff8, 0x3ee72b36, 0x3e396c38,
0xbee707ae, 0x3ea1f874, 0x3e21e080, 0xbc28fd40, 0xbde64cc0, 0xbe9dce58};
uint32_t all_ts_out0_shape[] = {5, 2, 2, 4};
uint32_t all_ts_out0_exp_values[] = {
0x3f6bce30, 0x3beaf6b0, 0xbec8dfee, 0xbd929bc0, 0x3fb6725f, 0x3f5a6c8b,
0xbee4e44b, 0xbdb632bc, 0x3d9293c4, 0xbeb2a8df, 0xbf3182a1, 0x3f098928,
0x3e809ad9, 0xbeaffc95, 0xbf27c972, 0x3f331a57, 0x3f5aaeda, 0xbe80c748,
0xbf27f391, 0xbc2d591f, 0x3faa62a2, 0x3f0a4d52, 0xbf36fb1c, 0x3ec311f0,
0x3e085652, 0xbec6dc8d, 0xbf337790, 0x3efc9499, 0x3e91c071, 0xbe90d269,
0xbf2fab83, 0x3f3dd627, 0x3f50bbdd, 0xbf231060, 0xbf498fe4, 0xbea2a61f,
0x3f8e5d9f, 0x3c3ce7d0, 0xbf4c17b3, 0xbe4c7e78, 0x3dd54579, 0xbecb384e,
0xbf1ee32c, 0x3ed38a87, 0x3e22ecd6, 0xbe6019ea, 0xbebcd226, 0x3f30504a,
0x3f4025f6, 0xbe8eb1d6, 0xbf4b2853, 0x3eac30ec, 0x3f79e857, 0x3e21b530,
0xbf4bce96, 0x3ecebfe8, 0x3e88787f, 0xbe832944, 0xbf56da6c, 0x3e9e64a2,
0x3ea35eab, 0xbd986300, 0xbf12712f, 0x3f597df2, 0x3f3d9310, 0xbf083670,
0xbf5b102b, 0x3ec92c09, 0x3f5df7b3, 0x3e6c9d73, 0xbf43d815, 0x3e9aa38c,
0x3f09f861, 0x3d10f6e8, 0xbf26dab4, 0x3e60c564, 0x3ea667e6, 0x3ee0c930,
0xbed2d9b6, 0x3f655092};
zdnn_ztensor *input0 =
alloc_ztensor_with_values(input0_shape, ZDNN_3DS, test_datatype,
NO_CONCAT, false, (void *)input0_values);
zdnn_ztensor *all_ts_out0 = test_layer(
input0, h00_shape, (void *)h00_values, weights0_shape,
(void *)weights0_values, biases0_shape, (void *)biases0_values,
hidden_weights0_shape, (void *)hidden_weights0_values,
hidden_biases0_shape, (void *)hidden_biases0_values, all_ts_out0_shape,
(void *)all_ts_out0_exp_values, false, is_layer_bidir[0]);
// second layer
uint32_t h01_shape[] = {2, 2, 5};
uint32_t h01_values[] = {0x3e9dedd8, 0x3fdf494a, 0x3f17202a, 0x3fab0a5b,
0x3fbdc183, 0x3f5202c8, 0x3fc27d91, 0x3f450430,
0x3f4db9fc, 0x3fdf09e5, 0x3f55605a, 0x3f12f64e,
0x3f1aaad2, 0x3f901ccb, 0x3fe8eecd, 0x3f93bb52,
0x3f2716d8, 0x3faeb44b, 0x3f1ed3c6, 0x3eab06f4};
uint32_t weights1_shape[] = {2, 8, 5};
uint32_t weights1_values[] = {
0xbe1b9f76, 0x3e77c3f8, 0xbead1b0c, 0x3ecb999a, 0xbe20471c, 0x3e5f481c,
0x3ed6d390, 0xbe5f6ed0, 0xbdea86c4, 0x3e5bbc44, 0xbe9fee66, 0x3eb76f78,
0xbedd358e, 0x3e497f5c, 0x3d527bf8, 0x3dcedbf0, 0xbecc7bce, 0xbe81ffa5,
0x3ecbffd4, 0xbddabb30, 0xbe9aebe8, 0xbe9c4230, 0x3ebc5350, 0x3e177f54,
0xbe811315, 0xbee1e40d, 0xbd93fe10, 0x3ebb7968, 0x3ee4e636, 0xbe807b12,
0xbeb7e0dd, 0xbe94b3e4, 0x3ea3e864, 0x3e6d851c, 0xbeb030cc, 0x3eb4c482,
0x3e9eff90, 0x3eb0cbec, 0x3e8e5746, 0x3e99b77e, 0xbebd9868, 0x3eb1c556,
0x3ed4086e, 0xbe5113e1, 0xbe4a029f, 0xbecfb148, 0xbd891828, 0x3ed8ea94,
0x3e6fec98, 0x3e2270c4, 0x3de585b8, 0xbec9e6b4, 0x3ecebb20, 0xbe53d7b8,
0x3ec72844, 0xbd0ab3d0, 0xbecee7d6, 0xbec12893, 0xbe618c84, 0x3e66f338,
0xbe6741db, 0x3ed5ca40, 0xbe2ccb44, 0xbd203aa8, 0x3d81ac10, 0x3db92198,
0x3e4a7010, 0xbe9d7ac6, 0xbd301208, 0x3ec2d2d6, 0x3e2de8c8, 0x3e479f54,
0x3ed8d474, 0xbeb25d85, 0x3d763d80, 0x3eb61b5a, 0xbec61cd1, 0xbe44c542,
0x3ebee346, 0xbe53df41, 0xbe6f1e1b, 0x3e9e8ea0, 0x3ee46d12, 0xbdd1fa20,
0xbe8f83dc, 0x3b9084c0, 0x3ece54c0, 0x3e915990, 0xbe20b66c, 0x3e036284,
0x3e3d5c4c, 0x3e86944e, 0xbe9e462c, 0x3ebbff92, 0xbe2089f6, 0x3d0f9250,
0x3d757710, 0x3e332b14, 0x3e804fe0, 0x3eb2e8ec, 0x3d190310, 0x3e55de1c,
0x3eace9cc, 0xbe89ca96, 0xbda4ce70, 0x3d64ba10, 0xbe5ec821, 0x3ee4e29a,
0x3ecdd9da, 0x3e3248cc, 0xbeb401c3, 0xbebdbf60, 0x3ec83690, 0x3e5d42c8,
0x3da2aec0, 0x3d0ff4f8, 0xbec4e626, 0x3eb165e2, 0x3e79a49c, 0x3e42d780,
0x3dcaf230, 0x3ee0b168, 0xbdcc9010, 0x3ed7e74a, 0xbe97eae2, 0xbdd20768,
0xbed93dd3, 0x3e917fbc, 0xbdcbff10, 0xbd084a60, 0xbe5de779, 0xbedb8204,
0x3caf1b90, 0xbda475ac, 0x3ebd81c8, 0x3d7ba930, 0x3ee1a07c, 0xbedeee8a,
0x3eb369f6, 0xbe19b22c, 0x3ebc4676, 0xbe90de80, 0xbe872d40, 0x3e662ae4,
0xbed457d0, 0xbe9acddc, 0x3daf9920, 0xbe1c1d3a, 0x3ec2326c, 0x3e3bb9b4,
0x3caa1db0, 0xbd1e1828, 0x3e667240, 0x3e8472c6, 0x3edee626, 0xbe28e040,
0xbdc0f07c, 0xbe942d27, 0xbe0aeb80, 0xbe025ea8, 0x3ea2c8c8, 0x3e3b45e4,
0x3ec26bea, 0x3cc42620, 0xbe436636, 0xbd507b08, 0xbeadd2f6, 0x3c976030,
0xbe8d6097, 0x3e24fec4, 0xbe8480f1, 0x3edb2580, 0xbea1ef22, 0x3cbbc620,
0xbea299d4, 0x3eb0f98a, 0x3eade05a, 0xbe190f96, 0x3e5b9460, 0x3e2a3b28,
0x3e926d0a, 0xbe26eada, 0xbdf89d64, 0x3e6328f4, 0x3ec258fa, 0x3d082a00,
0x3e80b7ea, 0xbdb0b3c4, 0xbed14480, 0x3cf049b0, 0xbe58367b, 0xbe8219a4,
0x3d2f4d98, 0xbda3e468, 0xbe989ba1, 0x3ed26ffe, 0xbd8f889c, 0x3ed3c2e8,
0xbeaa2909, 0xbe24134e, 0x3ed89bd2, 0xbce64df0, 0xbed605fa, 0x3edaf946,
0xbe9d91cb, 0x3dd8c630, 0x3e8fcc58, 0xbeac7e7c, 0xbe8525dc, 0xbec0490a,
0xbedf67a9, 0x3dedf310, 0x3e4679ac, 0x3ebc54aa, 0xbe4e7f38, 0xbe025fa2,
0x3e10ce08, 0xbe879404, 0xbeb62674, 0x3d940df8, 0xbe9bf81e, 0x3d2a1fb8,
0x3d836668, 0xbddc5118, 0x3ed2d41c, 0x3ec8c0ca, 0x3e2abb28, 0x3e122c34,
0x3e791bd4, 0xbe5a9fca, 0xbd97418c, 0xbddf28c4, 0x3d01b298, 0xbe3b1bb2,
0x3e23c650, 0xbed0b705, 0xbe362bda, 0xbe94746f, 0x3ec058f2, 0xbde59ef4};
uint32_t biases1_shape[] = {2, 5};
uint32_t biases1_values[] = {
0xbe5fbd62, 0x3d21ed48, 0x3e84b2c0, 0xbd3641c0, 0x3e0b5e64, 0x3cdf2e90,
0x3eb58a42, 0xbe019774, 0x3e578a54, 0x3ec4c2fc, 0x3e8d90bc, 0xbdfe1e6c,
0xbe2ab878, 0x3de69348, 0x3e226e48, 0xbd6ccb78, 0xbea7780c, 0x3e061770,
0xbea2cdd5, 0xbeb5b12a, 0xbddce6cc, 0x3e208298, 0xbea5ddf2, 0xbe86a497,
0xbe68730d, 0x3e97de7c, 0xbe703894, 0xbd48ccd8, 0xbe101be0, 0xbeb81f6b};
uint32_t hidden_weights1_shape[] = {2, 5, 5};
uint32_t hidden_weights1_values[] = {
0xbe325de8, 0x3d7f2f50, 0x3ee1dbc6, 0xbec39102, 0xbe62d375, 0x3dc59638,
0xbe9933c8, 0x3e83aa94, 0x3e55ae7c, 0xbebc9a53, 0xbeb3c7f8, 0xbeb1870e,
0xbd4e46b0, 0xbe81b1a9, 0x3e6ef9a8, 0x3e11fa20, 0xbe0d48c0, 0x3e20904c,
0x3e5c50f0, 0xbd3aa670, 0x3e75d434, 0x3e4904fc, 0xbee0a324, 0xbea1a3c0,
0x3eb037d8, 0xbd912c2c, 0xbe8d3f0b, 0x3ea0bcaa, 0x3e1f747c, 0x3d6b9ee0,
0xbebdb332, 0x3e935dc2, 0xbea9c5c8, 0xbecccd1f, 0x3ec31294, 0x3e62e8b8,
0x3ed8df8e, 0x3dd289d0, 0x3dd09a78, 0xbe5a7ee4, 0x3e08fc84, 0x3d1ef258,
0x3d851878, 0xbe91286a, 0x3e92b048, 0x3e37f4f4, 0xbed5df49, 0xbe5655f7,
0xbd4613f0, 0xbd68b2e8, 0x3dc7ff90, 0xbede7c01, 0xbe37306c, 0x3e769414,
0x3cc4e590, 0xbeae8cee, 0xbdb9cd1c, 0x3e062014, 0x3ee39938, 0x3e592a78,
0x3e63833c, 0x3e99f81e, 0x3ca711d0, 0x3e675c3c, 0xbeb798f6, 0xbecfe0c1,
0xbed8b7ed, 0xbece783b, 0xbe972362, 0xbe03b7b6, 0xbedc1c4e, 0x3ebe51d8,
0x3ebde4ee, 0x3ebf18f2, 0xbee0d2e5, 0xbdc69f70, 0xbd7bb5b8, 0xbd840080,
0xbd88748c, 0xbe987408, 0x3ea85d6a, 0x3eb1c89c, 0xbda0df98, 0xbc914780,
0xbd4637a8, 0xbeb1737d, 0xbe0d071e, 0x3e1469e4, 0x3e9ccdb4, 0xbcc4c620,
0x3d428f68, 0x3eb8509c, 0x3e33aa40, 0xbdf7d0f0, 0x3e4c5720, 0x3ed75422,
0xbedd7e2d, 0x3eafcf42, 0x3ec8b9ca, 0x3e9c8b2a, 0x3e8db48a, 0x3d737210,
0xbebfde02, 0x3ea17e98, 0x3e109bc4, 0x3e7d66c4, 0x3e630230, 0xbe77c691,
0x3e658114, 0x3e3b6830, 0x3c6b9ce0, 0xbea06a9a, 0x3eca9984, 0x3e878898,
0xbe0751fa, 0xbe0d9648, 0x3ec02c1a, 0x3ed16702, 0x3ec1d528, 0x3e1940d8,
0xbdc724ec, 0xbd472b98, 0xbed417e7, 0xbe76cf7f, 0xbe03c1e6, 0xbe597ddb,
0x3cb6fdc0, 0xbea9a47a, 0x3ed6ece6, 0x3eb5c09c, 0xbec763d2, 0x3df84f58,
0xbd92bdd4, 0xbeb76e74, 0xbdcf25dc, 0xbed21657, 0x3e9ba3fc, 0xbe877dfe,
0x3e8a9360, 0x3d26cb70, 0x3edf9e2a, 0x3ec36c40, 0xbe82d308, 0xbe6d6e1d,
0xbea00f51, 0xbde46c64, 0x3eb9b38a, 0x3dd941a0, 0xbdb26478, 0x3ec2e956};
uint32_t hidden_biases1_shape[] = {2, 5};
uint32_t hidden_biases1_values[] = {
0x3edfa1a6, 0xbeacb04b, 0xbeb99f2f, 0xbead8f69, 0xbb0d5900, 0xbe46414a,
0x3eacd6bc, 0xbe3e8e36, 0xbe9f0e96, 0x3d8d0aa8, 0x3ed26dd4, 0xbdf673cc,
0xbaa00c00, 0xbe5ddf86, 0x3ee494f2, 0x3941c000, 0x3eac49a0, 0x3ec0e9e4,
0x3d2ae830, 0x3dd00540, 0xbde97700, 0xbe95df9f, 0xbe2440b2, 0x3dad0a60,
0xbe6f45de, 0x3e893e48, 0xbece70c1, 0x3ecefc06, 0x3e24bcd8, 0xbea06bf2};
uint32_t all_ts_out1_shape[] = {5, 2, 2, 5};
uint32_t all_ts_out1_exp_values[] = {
0x3ebd7308, 0x3f64bfca, 0x3e96c38c, 0x3f3d08b3, 0x3f4ed9a0, 0x3f4cbe71,
0x3f4eb04c, 0x3e85fce6, 0x3e97fd95, 0x3f8901b2, 0x3f2f67f8, 0xbedb2fa7,
0xbefa32c3, 0x3ec8b1e1, 0xbed037d0, 0x3f4ba138, 0xbeabf370, 0xbf4120d0,
0x3ecca2bb, 0xbf2c7ea2, 0x3ee040ab, 0x3eedbe60, 0x3e6717ff, 0x3ec905c6,
0x3ed8b348, 0x3f4ef281, 0x3ed71f06, 0x3dc962ab, 0x3d91f336, 0x3f248a02,
0x3f2dcf90, 0xbe9f2068, 0xbedd7c39, 0x3ead94a9, 0xbe50a7d2, 0x3f4577ae,
0xbe58e036, 0xbf377f08, 0x3eb83368, 0xbf0e3834, 0x3ef216aa, 0x3e5e4d08,
0x3e745018, 0x3e62837b, 0x3e11ab71, 0x3f4573d1, 0x3e0c25a8, 0x3debf382,
0xbdd8e72a, 0x3e7fc71f, 0x3f2cd1a8, 0xbe07a588, 0xbea8172b, 0x3ea0fd5a,
0x3d6961d8, 0x3f3b7bd9, 0xbc7f2780, 0xbf166c22, 0x3ed1edf5, 0xbed9aa68,
0x3f0c0d67, 0x3e330cab, 0x3e2f6954, 0x3e448f94, 0x3dfbfbd1, 0x3f4a1f6a,
0x3d3c12cf, 0x3dfc4ab2, 0xbe278de1, 0x3e04a44b, 0x3f2753d8, 0x3e1a2b60,
0xbee04b4a, 0x3e6e3232, 0x3ee33cb4, 0x3f25b2fe, 0x3e751f72, 0xbf051b4b,
0x3ed8a638, 0xbe8dcab2, 0x3f1f2826, 0x3e559c26, 0x3dbbfaaa, 0x3e4feb0b,
0x3e147845, 0x3f4b713c, 0x3c42a99e, 0x3de07552, 0xbe9fcdb7, 0x3d18e8b6,
0x3f2e37ae, 0x3eea935e, 0xbe97bb49, 0x3eefdd22, 0x3f84dd69, 0x3f3c21fd,
0x3f0577d9, 0xbceedc60, 0x3f1a0c27, 0xbd85eeb0};
zdnn_ztensor *all_ts_out1 = test_layer(
all_ts_out0, h01_shape, (void *)h01_values, weights1_shape,
(void *)weights1_values, biases1_shape, (void *)biases1_values,
hidden_weights1_shape, (void *)hidden_weights1_values,
hidden_biases1_shape, (void *)hidden_biases1_values, all_ts_out1_shape,
(void *)all_ts_out1_exp_values, is_layer_bidir[0], is_layer_bidir[1]);
free_ztensor_buffers(3, input0, all_ts_out0, all_ts_out1);
}
void gru_bidir_to_fwd() {
// num_timesteps = 5
// num_batches = 2
// num_features = 4
// num_hidden = 5, 4
bool is_layer_bidir[] = {true, false};
// first layer
uint32_t input0_shape[] = {5, 2, 4};
uint32_t input0_values[] = {
0x3f80f554, 0x3eed5744, 0x3fe9598b, 0x3fde3340, 0x3fb14cbd, 0x3f3b5a0a,
0x3f82893d, 0x3e5414c8, 0x3f8b5bf7, 0x3f3c425a, 0x3fa6aeeb, 0x3f99290e,
0x3ffa48dc, 0x3fd4c5a9, 0x3fb4c3ba, 0x3f768450, 0x3f1acb50, 0x3eccc9d0,
0x3fd6c6c6, 0x3fb7bd3f, 0x3f230434, 0x3e2daec8, 0x3f9a57a9, 0x3e80dd48,
0x3f94a1a8, 0x3f64e95e, 0x3dc195b0, 0x3ff6bde7, 0x3fd094b3, 0x3fa067b8,
0x3fb1e4f7, 0x3e0b4360, 0x3fd2f78d, 0x3fbaec30, 0x3fd96d0d, 0x3ff7e13b,
0x3fcab802, 0x3e0fc588, 0x3f0dc4a2, 0x3f03ec80};
uint32_t h00_shape[] = {2, 2, 5};
uint32_t h00_values[] = {0x3f72895c, 0x3fc19f9d, 0x3f54b050, 0x3ff7834f,
0x3fdc7d0d, 0x3fc1fce3, 0x3ebcf5b4, 0x3ed3cdb4,
0x3fb8c472, 0x3f849e59, 0x3eb88b80, 0x3bc03f00,
0x3f1a65ee, 0x3f5d6a8e, 0x3ea8b604, 0x3fcb5de0,
0x3f504bc2, 0x3fe33d36, 0x3fd8b70c, 0x3fc21f69};
uint32_t weights0_shape[] = {2, 4, 5};
uint32_t weights0_values[] = {
0xbed56486, 0x3dab6e00, 0x3e301b34, 0x3ea3ea60, 0x3e64a8e0, 0x3ecb70ec,
0xbd9a4a9c, 0xbe879f2a, 0x3e2b3b3c, 0x3dbfb2a0, 0x3eae1a26, 0xbd96b870,
0x3e27e118, 0xbee29f7d, 0xbeb29e53, 0xbee46847, 0xbe51a0d6, 0x3e67965c,
0xbe9488c8, 0xbe83d8ea, 0xbedd7037, 0xbd9bb79c, 0xbe05dde4, 0x3e4eb6e0,
0x3e83a64e, 0x3ea58b16, 0xbe71302b, 0xbe87f0ac, 0xbe5bef69, 0x3e72843c,
0xbebd57dc, 0x3d721bf0, 0xbe295dd2, 0xbe98cd9b, 0x3eafbdf8, 0x3bc9f900,
0x3ebd57a4, 0xbee2f252, 0xbe83e971, 0x3e261b9c, 0x3e4f3564, 0x3e7b6660,
0x3e8e0cba, 0x3e33fa44, 0x3db646b0, 0x3e382b04, 0xbd673410, 0x3edbdbde,
0x3ebdb73a, 0xbec71c7c, 0xbe87a208, 0x3c8be180, 0xbeb073c8, 0x3ec7411a,
0x3d2882b8, 0x3e0a5954, 0x3dd43780, 0xbe27d2d8, 0x3eca0944, 0xbe8f3f38,
0x3e62db50, 0xbd87a1dc, 0x3ecd16fc, 0xbea1dc41, 0x3eabc840, 0xbea317e4,
0x3ba04b00, 0x3e8c10dc, 0xbe4216de, 0xbe136b9a, 0xbe01fea6, 0x3ebcdbe4,
0xbe90c29a, 0x3e06b148, 0x3a3bae00, 0xbe19a60a, 0x3d64ada0, 0x3e4d6418,
0x3e1735dc, 0x3e6bc304, 0x3ed76812, 0xbeda1e9d, 0xbcc9dc90, 0xbe8b56d8,
0xbde3f398, 0x3e9a494e, 0xbc03b300, 0x3d898450, 0x3ecfc37a, 0x3ca54f60,
0xbe47ad20, 0xbeac6e30, 0xbe3b8b06, 0x3e9cea58, 0x3d85a140, 0xbde68434,
0xbeb09ec1, 0x3e87de1e, 0xbec116de, 0x3dd939f0, 0xbe18ef8e, 0x3df90638,
0x3dc5e080, 0xbe94b2a7, 0xbe9a126e, 0xbdfff1d8, 0xbe58ace5, 0xbe807ef0,
0xbd506310, 0xbeb9158a, 0x3e074a8c, 0xbe574539, 0xbecc25a9, 0x3e0332e4,
0x3eb682ba, 0x3ebc395c, 0xbed87cba, 0x3ec8c806, 0x3c696e40, 0x3d952498};
uint32_t biases0_shape[] = {2, 5};
uint32_t biases0_values[] = {
0x3c9a1ea0, 0x3e9399fc, 0xbead15e7, 0xbe9d3169, 0xbe84f4be, 0x3ed6d390,
0x3eb76f78, 0xbecc7bce, 0xbe9c4230, 0xbd93fe10, 0xbe7c17a9, 0xbe856c8e,
0x3e1c8044, 0xbe8872c0, 0xbc1fdba0, 0xbe9aebe8, 0xbee1e40d, 0xbeb7e0dd,
0x3eb4c482, 0x3e77c3f8, 0x3ea04420, 0xbe249da6, 0x3ec122ee, 0x3e2598d4,
0x3d67f3b0, 0xbe94b3e4, 0x3e9eff90, 0xbead1b0c, 0xbe5f6ed0, 0xbedd358e};
uint32_t hidden_weights0_shape[] = {2, 5, 5};
uint32_t hidden_weights0_values[] = {
0xbe591a24, 0xbed64902, 0xbedcd447, 0xbdb06a40, 0x3bbd8300, 0x3e9be8be,
0xbec14162, 0x3e8ed458, 0xbdb3d438, 0xbe5008a0, 0xbb3dfe00, 0xbdb9c6e0,
0xbeb32c7f, 0xbecd7820, 0x3e2c8218, 0xbe639ee9, 0x3e7b2408, 0xbdc1a118,
0xbec5b44b, 0xbece16e2, 0xbeaf7709, 0x3e7795b4, 0xbe39af54, 0xbd8f518c,
0xbcf73e90, 0xbebbb5a4, 0x3e987cda, 0x3d190310, 0x3ece54c0, 0xbebdbf60,
0xbd8d3050, 0xbe6f1e1b, 0x3d64ba10, 0x3e86944e, 0xbec4e626, 0x3e1f0c64,
0x3b9084c0, 0xbeb401c3, 0x3d757710, 0x3ee46d12, 0xbe271822, 0x3e3d5c4c,
0x3d0ff4f8, 0x3e55de1c, 0x3e915990, 0x3e2dd3f4, 0x3d0f9250, 0x3e9e8ea0,
0xbe5ec821, 0xbe9e462c, 0x3e33b614, 0xbe87b6cc, 0xbdc2d30c, 0xbd3c4ee0,
0x3ed8e4e6, 0x3cdb72e0, 0xbde54684, 0x3dc3c730, 0x3c4ba340, 0x3e916930,
0xbe5f7204, 0xbe5f126c, 0xbe952b16, 0xbd1e06b8, 0x3ed963f2, 0x3e58b204,
0xbe20347e, 0xbcbc0320, 0x3db95c18, 0xbd047a58, 0xbedba477, 0xbebbabe9,
0x3ea3e928, 0x3e91971e, 0xbecdb113, 0x3ebdf640, 0xbe8b7474, 0xbe8d2560,
0xbe5cf1d4, 0xbeaa02a3, 0xbe1985b8, 0x3e1ae0c4, 0x3e9e5868, 0x3da48928,
0x3d987eb0, 0x3e06e404, 0x3e10c984, 0xbeaa740b, 0xbe6de235, 0x3e430d88,
0xbdd1716c, 0xbc172600, 0x3e3b9ae0, 0x3dd306b8, 0x3e354500, 0xbca2ec60,
0xbdcdfc84, 0xbe19fc78, 0x3db3dd28, 0xbd669538, 0x3ec95d16, 0x3e90def0,
0x3d448f50, 0x3e21ba5c, 0x3eb5ccc2, 0xbe3acaf4, 0xbd5360c8, 0xbdbfc268,
0xbddc3fcc, 0x3e9cab54, 0x3ecec37e, 0xbe4c6e38, 0x3ea38344, 0xbdc09e0c,
0xbea1834a, 0x3e25d8dc, 0x3eb7196c, 0x3dffaaa8, 0xbebcb154, 0x3ca36520,
0xbecea3b7, 0xbeb90843, 0x3e5c0bbc, 0x3e7822c0, 0xbea16940, 0x3e332b14,
0xbdd1fa20, 0x3ecdd9da, 0xbe2089f6, 0x3e42d780, 0x3eace9cc, 0xbe20b66c,
0x3e5d42c8, 0x3eb2e8ec, 0xbe1b9f76, 0x3ee4e29a, 0x3ebbff92, 0x3e79a49c,
0xbda4ce70, 0x3e5f481c, 0x3ec83690, 0x3e804fe0, 0xbe8f83dc, 0x3e3248cc,
0xbe9fee66, 0x3eb165e2, 0xbe89ca96, 0x3e036284, 0x3da2aec0, 0x3dcedbf0};
uint32_t hidden_biases0_shape[] = {2, 5};
uint32_t hidden_biases0_values[] = {
0x3d69b920, 0xbeb31aa8, 0xbe700636, 0x3eb8e4f2, 0xbeb9d294, 0x3ecb999a,
0xbdea86c4, 0x3e497f5c, 0x3ecbffd4, 0x3e177f54, 0xbb5a2c80, 0xbdb50e24,
0x3d65d3e0, 0xbdd123b0, 0xbb4def80, 0xbe81ffa5, 0x3ebc5350, 0x3ebb7968,
0x3ea3e864, 0x3eb0cbec, 0xbd0388a0, 0x3ee28262, 0x3e256d64, 0x3c4f7420,
0xbec66e3b, 0x3ee4e636, 0x3e6d851c, 0x3e8e5746, 0xbe20471c, 0x3e5bbc44};
uint32_t all_ts_out0_shape[] = {5, 2, 2, 5};
uint32_t all_ts_out0_exp_values[] = {
0x3ef5b84b, 0xbdc926d0, 0x3f497c28, 0xbdfc36e0, 0x3e607fa7, 0x3f82f3c5,
0xbe8cee08, 0x3f14bc02, 0x3f188fa6, 0x3e241ab6, 0x3eac5206, 0xbedcec07,
0xbe3aac82, 0xbefbfe4a, 0x3d0ad0f8, 0xbda044a0, 0xbbf714b4, 0xbee36d2e,
0xbee0ad51, 0xbe2f3894, 0x3f13e1fa, 0xbf189b8e, 0x3f2c5fec, 0x3dc15418,
0xbc762fd0, 0x3f76bfe3, 0xbf2b3644, 0x3f21d8d1, 0x3ede3af5, 0xbc315408,
0x3e8d057a, 0xbeb31b94, 0xbe3ae016, 0xbefdfa09, 0xbc714a80, 0x3e15b5fa,
0xbd251f38, 0xbec45527, 0xbe918512, 0xbb884180, 0x3ef72c4b, 0xbf41095a,
0x3f193819, 0x3e22c8d5, 0xbc6dc4e6, 0x3f42b484, 0xbf1bf022, 0x3eedea11,
0x3eb9245f, 0xbdfb31b3, 0x3ecf949d, 0xbea0e868, 0xbde5266b, 0xbecc0f8c,
0x3d2635b8, 0x3ed43076, 0x3e94113c, 0xbe8f5d00, 0x3e26030d, 0x3e5ae9d3,
0x3f3f74e5, 0xbf47bebc, 0x3f40cc76, 0xbe345ea9, 0xbd596022, 0x3f5486e7,
0xbf33d933, 0x3edef9ca, 0x3ee99606, 0xbe2fe4f9, 0x3ec22476, 0xbe846bbc,
0x3ec65d70, 0xbebc0ca3, 0xbd2b4ab0, 0x3f239afa, 0x3ee52450, 0xbdbb9c94,
0x3f011b6e, 0x3ef79e50, 0x3f51084e, 0xbf65575e, 0x3f388490, 0x3debd8da,
0xbcaa7ed3, 0x3f58d051, 0xbf2f19e6, 0x3f027aa7, 0x3d9a45a2, 0xbe5edb76,
0x3edce200, 0xbe25588a, 0xbde12298, 0xbce7e6a0, 0x3e6c86cc, 0x3f932f62,
0x3f32daf3, 0x3f4d9d22, 0x3f9152b4, 0x3f61d3b8};
zdnn_ztensor *input0 =
alloc_ztensor_with_values(input0_shape, ZDNN_3DS, test_datatype,
NO_CONCAT, false, (void *)input0_values);
zdnn_ztensor *all_ts_out0 = test_layer(
input0, h00_shape, (void *)h00_values, weights0_shape,
(void *)weights0_values, biases0_shape, (void *)biases0_values,
hidden_weights0_shape, (void *)hidden_weights0_values,
hidden_biases0_shape, (void *)hidden_biases0_values, all_ts_out0_shape,
(void *)all_ts_out0_exp_values, false, is_layer_bidir[0]);
// second layer
uint32_t h01_shape[] = {1, 2, 4};
uint32_t h01_values[] = {0x3f62957c, 0x3f9e9191, 0x3fc827a5, 0x3fc7d2ab,
0x3fe27e59, 0x3ea84764, 0x3e9400d4, 0x3f8e916a};
uint32_t weights1_shape[] = {1, 10, 4};
uint32_t weights1_values[] = {
0x3e7e5dec, 0x3cbaca20, 0xbecd44ac, 0xbec67b5a, 0xbee86a24, 0xbee6d710,
0xbe1343c8, 0xbe1df5e4, 0xbef61752, 0x3ed44ee8, 0xbefb5c54, 0x3e60bf20,
0xbef8bec0, 0x3e89d76e, 0xbe476b90, 0x3efc847a, 0xbdcfbb68, 0x3efe7680,
0x3ddce8a0, 0x3e93351a, 0x3eac2490, 0x3e815596, 0xbec9005c, 0xbd669fb0,
0xbef24c72, 0xbea8fa48, 0x3e233510, 0x3e338400, 0x3ed4c8ae, 0x3ed5a748,
0x3e896c2c, 0xbefb26f0, 0xbe4ccfcc, 0x3cdc2320, 0x3d8ea718, 0xbedaa662,
0x3e15f4e8, 0x3e72cc80, 0xbeab490c, 0x3e6ee73c, 0xbe9424c4, 0xbe3a9e8c,
0x3d43feb0, 0xbe5a7688, 0x3ec5dd22, 0x3e8fe95c, 0x3eecc1a2, 0x3e387860,
0x3ea3b58c, 0xbe9174d6, 0x3cdb4d20, 0xbeb5cb18, 0x3d183c70, 0xbda079b8,
0xbe9e108c, 0x3e3e52fc, 0xbe71bbb8, 0x3ed95eb2, 0x3cd1f000, 0x3ed94986,
0x3eeb46b8, 0x3ca93e40, 0x3e757f58, 0x3d065330, 0x3e5160ac, 0xbeb50c40,
0x3e7df8fc, 0xbeaa9ef0, 0xbec2575a, 0xbe2b2094, 0xbee9f7e0, 0xbe377120,
0x3ef50362, 0xbe0afadc, 0xbdb73cb0, 0x3ddf9ad8, 0x3ec26652, 0xbdc58f20,
0xbebe3eb8, 0xbec32746, 0xbe910096, 0x3c83b620, 0x3ee2836a, 0xbe174ae8,
0x3e76522c, 0xbe1e4c94, 0x3eea1e74, 0x3e2b57a8, 0xbeb4b7f8, 0xbddea668,
0xbeed20aa, 0xbe134f2c, 0xbe7d9964, 0x3d881718, 0x3eb48e6e, 0x3e9e4660,
0xbed2dd48, 0x3e7dcda4, 0x3e804bf8, 0xbe0e0e88, 0x3e85975a, 0xbeb359dc,
0x3e9787f8, 0xbe3edf14, 0xbd50ae60, 0x3ed6daea, 0x3ed8b624, 0x3e00e540,
0x3ec50494, 0xbd5eade0, 0xbe89f8a6, 0x3e359a68, 0x3e9e6e68, 0xbed6839a,
0x3e21ad5c, 0xbe7a2610, 0x3e8da7dc, 0xbe8a82c4, 0x3e518704, 0x3d350a30};
uint32_t biases1_shape[] = {1, 4};
uint32_t biases1_values[] = {0x3ed91e6a, 0x3e9414ee, 0xbe1b5134, 0xbea9d954,
0xbde41328, 0xbdb7df18, 0x3ec89742, 0x3e80aae6,
0xbd1860a0, 0x3ed3e00e, 0xbe2bd65c, 0xbeed681e};
uint32_t hidden_weights1_shape[] = {1, 4, 4};
uint32_t hidden_weights1_values[] = {
0xbec20bd4, 0xbe37a3e0, 0xbdead1c8, 0xbead1388, 0xbb1e0800, 0x3dc17718,
0xbe7818b0, 0x3cbe3200, 0xbe0282c4, 0xbe85c1f4, 0x3d8caec8, 0xbe3ccf20,
0xbea7904e, 0x3de2e688, 0x3ed27f84, 0x3efb36e4, 0x3e945c74, 0x3e3374b0,
0x3eeb444c, 0x3eff8ff0, 0xbd4bc4f0, 0xbeb971e6, 0xbe09c564, 0x3efa070e,
0x3e1bd1a8, 0xbe96890e, 0xbab2f000, 0xbec11260, 0xbdf6f9b8, 0xbe81f174,
0xbe780fcc, 0xbecf8810, 0xbef2e226, 0x3dc45320, 0x3ea2ac3c, 0xbe2e8a7c,
0xbef56ad2, 0xbd30c140, 0x3cc3c6c0, 0x3e80d2ca, 0x3efc4230, 0xbdd7b678,
0xbef93ece, 0xbea5aa8e, 0xbea1f7f8, 0xbde4b548, 0xbe9721d8, 0x3ef1632e};
uint32_t hidden_biases1_shape[] = {1, 4};
uint32_t hidden_biases1_values[] = {
0xbee83510, 0x3e004e90, 0xbd1b12f0, 0xbe8146d8, 0x3e51e224, 0x3ef9356c,
0xbe11c200, 0xbed3f95a, 0x3dcefba8, 0x3e426fc0, 0x3ecb9a06, 0x3ec6c0fc};
uint32_t all_ts_out1_shape[] = {5, 1, 2, 4};
uint32_t all_ts_out1_exp_values[] = {
0xbe93b296, 0x3f6c52a6, 0x3df2ed90, 0x3ef58c75, 0xbedbef50, 0x3eed420e,
0x3ec9725f, 0x3ec154ec, 0xbeeff3f1, 0x3f4f798a, 0xbe9a656d, 0x3d7b8358,
0xbf152f9e, 0x3ef51830, 0x3cccae54, 0xbd1913d8, 0xbf05fe08, 0x3f3bc79e,
0xbeb73f4b, 0xbe1f8736, 0xbefd88cc, 0x3ee483a5, 0x3e477805, 0xbe95932c,
0xbf043d99, 0x3f2ccf54, 0xbefa4aaa, 0xbebf8337, 0xbe99bc9a, 0x3ebeeba4,
0x3ec71822, 0xbef39d98, 0xbf09c6e7, 0x3f1c7ff1, 0xbe53e20e, 0xbef9fb1e,
0x3cfaaa80, 0x3e6a580a, 0x3ee7dd5b, 0xbf2cbac5};
zdnn_ztensor *all_ts_out1 = test_layer(
all_ts_out0, h01_shape, (void *)h01_values, weights1_shape,
(void *)weights1_values, biases1_shape, (void *)biases1_values,
hidden_weights1_shape, (void *)hidden_weights1_values,
hidden_biases1_shape, (void *)hidden_biases1_values, all_ts_out1_shape,
(void *)all_ts_out1_exp_values, is_layer_bidir[0], is_layer_bidir[1]);
free_ztensor_buffers(3, input0, all_ts_out0, all_ts_out1);
}
int main() {
UNITY_BEGIN();
RUN_TEST_ALL_DLFLOAT16_PRE_DATATYPES(gru_fwd_to_fwd);
RUN_TEST_ALL_DLFLOAT16_PRE_DATATYPES(gru_fwd_to_bidir);
RUN_TEST_ALL_DLFLOAT16_PRE_DATATYPES(gru_bidir_to_bidir);
RUN_TEST_ALL_DLFLOAT16_PRE_DATATYPES(gru_bidir_to_fwd);
return UNITY_END();
}
zDNN-1.1.2/tests/testDriver_zdnn_gru_rnn.c 0000664 0000000 0000000 00000074763 15000221702 0020556 0 ustar 00root root 0000000 0000000 // SPDX-License-Identifier: Apache-2.0
/*
* Copyright IBM Corp. 2021, 2024
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#include "common_rnn.h"
/******************************************************************************
default_input
******************************************************************************/
uint32_t default_input_shape[] = {5, 2, 4};
/* Visualization of values in shape (timestep, batch, feature) order
[
[ # timestep_0
[.000, .001, .002, .003], # batch_0
[.010, .011, .012, .013], # batch_1
# feat_0 feat_1 feat_2 feat_3
],
[ # timestep_1
[.100, .101, .102, .103], # batch_0
[.110, .111, .112, .113], # batch 1
# feat_0 feat_1 feat_2 feat_3
],
[ # timestep_2
[.200, .201, .202, .203], # batch_0
[.210, .211, .212, .213], # batch_1
# feat_0 feat_1 feat_2 feat_3
],
[ # timestep_3
[.300, .301, .302, .303], # batch_0
[.310, .311, .312, .313], # batch_1
# feat_0 feat_1 feat_2 feat_3
],
[ # timestep_4
[.400, .401, .402, .403], # batch_0
[.410, .411, .412, .413], # batch_1
# feat_0 feat_1 feat_2 feat_3
],
]
*/
float default_input_values[] = {
0.0, 0.001, 0.002, 0.003, 0.01, 0.011, 0.012, 0.013, 0.1, 0.101,
0.102, 0.103, 0.11, 0.111, 0.112, 0.113, 0.2, 0.201, 0.202, 0.203,
0.21, 0.211, 0.212, 0.213, 0.3, 0.301, 0.302, 0.303, 0.31, 0.311,
0.312, 0.313, 0.4, 0.401, 0.402, 0.403, 0.41, 0.411, 0.412, 0.413};
/******************************************************************************
default_uni_h0
******************************************************************************/
uint32_t default_uni_h0_shape[] = {1, 2, 3};
/* Visualization of values in shape order
[[[0. 0. 0.]
[0. 0. 0.]]]
*/
float default_uni_h0_values[] = {0.0, 0.0, 0.0, 0.0, 0.0, 0.0};
/******************************************************************************
default_uni_input_weights
******************************************************************************/
uint32_t default_uni_input_weights_shape[] = {1, 4, 3};
/* Visualization of z concatenation values in shape order
[[[-0.4937358 0.5553266 0.1960275]
[ 0.1839888 0.1733883 -0.2754271]
[ 0.2482673 -0.5119551 -0.5303364]
[ 0.0915996 0.4851032 0.329131 ]]]
*/
float default_uni_input_weights_z_values[] = {
-0.4937358, 0.5553266, 0.1960275, 0.1839888, 0.1733883, -0.2754271,
0.2482673, -0.5119551, -0.5303364, 0.0915996, 0.4851032, 0.329131};
/* Visualization of r concatenation values in shape order
[[[ 0.381342 0.4850937 -0.5389395]
[-0.4317299 -0.44266 0.5706354]
[ 0.4705055 -0.3875273 0.1228931]
[ 0.3694199 0.2747256 0.0745605]]]
*/
float default_uni_input_weights_r_values[] = {
0.381342, 0.4850937, -0.5389395, -0.4317299, -0.44266, 0.5706354,
0.4705055, -0.3875273, 0.1228931, 0.3694199, 0.2747256, 0.0745605};
/* Visualization of h concatenation values in shape order
[[[ 0.548669 -0.2726471 -0.5263513]
[-0.4730297 -0.1263285 -0.0133806]
[ 0.0315526 -0.385514 0.3423259]
[ 0.2071373 -0.2729528 0.2808076]]]
*/
float default_uni_input_weights_h_values[] = {
0.548669, -0.2726471, -0.5263513, -0.4730297, -0.1263285, -0.0133806,
0.0315526, -0.385514, 0.3423259, 0.2071373, -0.2729528, 0.2808076};
/******************************************************************************
default_uni_input_biases
******************************************************************************/
uint32_t default_uni_input_biases_shape[] = {1, 3};
/* Visualization of z concatenation values in shape order
[[0.0643551 0.2632221 0.4282453]]
*/
float default_uni_input_biases_z_values[] = {0.0643551, 0.2632221, 0.4282453};
/* Visualization of r concatenation values in shape order
[[-0.1866051 -0.392639 0.4665768]]
*/
float default_uni_input_biases_r_values[] = {-0.1866051, -0.392639, 0.4665768};
/* Visualization of h concatenation values in shape order
[[-0.3741214 0.4407408 -0.2892259]]
*/
float default_uni_input_biases_h_values[] = {-0.3741214, 0.4407408, -0.2892259};
/******************************************************************************
default_uni_hidden_weights
******************************************************************************/
uint32_t default_uni_hidden_weights_shape[] = {1, 3, 3};
/* Visualization of z concatenation values in shape order
[[[ 0.4629621 0.4114995 -0.049397 ]
[ 0.4833339 -0.1453276 -0.1190602]
[ 0.113032 0.4688771 -0.2869941]]]
*/
float default_uni_hidden_weights_z_values[] = {
0.4629621, 0.4114995, -0.049397, 0.4833339, -0.1453276,
-0.1190602, 0.113032, 0.4688771, -0.2869941};
/* Visualization of r concatenation values in shape order
[[[ 0.5423677 0.5621256 -0.5199673]
[-0.5070595 0.0945408 0.2686667]
[-0.0710383 -0.1628114 0.4383084]]]
*/
float default_uni_hidden_weights_r_values[] = {
0.5423677, 0.5621256, -0.5199673, -0.5070595, 0.0945408,
0.2686667, -0.0710383, -0.1628114, 0.4383084};
/* Visualization of h concatenation values in shape order
[[[ 0.3073992 -0.3689663 -0.3204532]
[ 0.233599 -0.3069769 -0.3292732]
[ 0.3672419 0.5463605 -0.1544762]]]
*/
float default_uni_hidden_weights_h_values[] = {
0.3073992, -0.3689663, -0.3204532, 0.233599, -0.3069769,
-0.3292732, 0.3672419, 0.5463605, -0.1544762};
/******************************************************************************
default_uni_hidden_biases
******************************************************************************/
uint32_t default_uni_hidden_biases_shape[] = {1, 3};
/* Visualization of z concatenation values in shape order
[[0.5068286 0.3320496 0.5366269]]
*/
float default_uni_hidden_biases_z_values[] = {0.5068286, 0.3320496, 0.5366269};
/* Visualization of r concatenation values in shape order
[[-0.0919193 0.4369227 0.5323023]]
*/
float default_uni_hidden_biases_r_values[] = {-0.0919193, 0.4369227, 0.5323023};
/* Visualization of h concatenation values in shape order
[[-0.2080224 -0.0367477 -0.1974721]]
*/
float default_uni_hidden_biases_h_values[] = {-0.2080224, -0.0367477,
-0.1974721};
/******************************************************************************
default_bidir_h0
******************************************************************************/
uint32_t default_bidir_h0_shape[] = {2, 2, 3};
/* Visualization of values in shape order
[[[0. 0. 0.]
[0. 0. 0.]]
[[0. 0. 0.]
[0. 0. 0.]]]
*/
float default_bidir_h0_values[] = {0.0, 0.0, 0.0, 0.0, 0.0, 0.0,
0.0, 0.0, 0.0, 0.0, 0.0, 0.0};
/******************************************************************************
default_bidir_input_weights
******************************************************************************/
uint32_t default_bidir_input_weights_shape[] = {2, 4, 3};
/* Visualization of z concatenation values in shape order
[[[-0.4937358 0.5553266 0.1960275]
[ 0.1839888 0.1733883 -0.2754271]
[ 0.2482673 -0.5119551 -0.5303364]
[ 0.0915996 0.4851032 0.329131 ]]
[[-0.4937358 0.5553266 0.1960275]
[ 0.1839888 0.1733883 -0.2754271]
[ 0.2482673 -0.5119551 -0.5303364]
[ 0.0915996 0.4851032 0.329131 ]]]
*/
float default_bidir_input_weights_z_values[] = {
-0.4937358, 0.5553266, 0.1960275, 0.1839888, 0.1733883, -0.2754271,
0.2482673, -0.5119551, -0.5303364, 0.0915996, 0.4851032, 0.329131,
-0.4937358, 0.5553266, 0.1960275, 0.1839888, 0.1733883, -0.2754271,
0.2482673, -0.5119551, -0.5303364, 0.0915996, 0.4851032, 0.329131};
/* Visualization of r concatenation values in shape order
[[[ 0.381342 0.4850937 -0.5389395]
[-0.4317299 -0.44266 0.5706354]
[ 0.4705055 -0.3875273 0.1228931]
[ 0.3694199 0.2747256 0.0745605]]
[[ 0.381342 0.4850937 -0.5389395]
[-0.4317299 -0.44266 0.5706354]
[ 0.4705055 -0.3875273 0.1228931]
[ 0.3694199 0.2747256 0.0745605]]]
*/
float default_bidir_input_weights_r_values[] = {
0.381342, 0.4850937, -0.5389395, -0.4317299, -0.44266, 0.5706354,
0.4705055, -0.3875273, 0.1228931, 0.3694199, 0.2747256, 0.0745605,
0.381342, 0.4850937, -0.5389395, -0.4317299, -0.44266, 0.5706354,
0.4705055, -0.3875273, 0.1228931, 0.3694199, 0.2747256, 0.0745605};
/* Visualization of h concatenation values in shape order
[[[ 0.548669 -0.2726471 -0.5263513]
[-0.4730297 -0.1263285 -0.0133806]
[ 0.0315526 -0.385514 0.3423259]
[ 0.2071373 -0.2729528 0.2808076]]
[[ 0.548669 -0.2726471 -0.5263513]
[-0.4730297 -0.1263285 -0.0133806]
[ 0.0315526 -0.385514 0.3423259]
[ 0.2071373 -0.2729528 0.2808076]]]
*/
float default_bidir_input_weights_h_values[] = {
0.548669, -0.2726471, -0.5263513, -0.4730297, -0.1263285, -0.0133806,
0.0315526, -0.385514, 0.3423259, 0.2071373, -0.2729528, 0.2808076,
0.548669, -0.2726471, -0.5263513, -0.4730297, -0.1263285, -0.0133806,
0.0315526, -0.385514, 0.3423259, 0.2071373, -0.2729528, 0.2808076};
/******************************************************************************
default_bidir_input_biases
******************************************************************************/
uint32_t default_bidir_input_biases_shape[] = {2, 3};
/* Visualization of z concatenation values in shape order
[[0.0643551 0.2632221 0.4282453]
[0.0643551 0.2632221 0.4282453]]
*/
float default_bidir_input_biases_z_values[] = {0.0643551, 0.2632221, 0.4282453,
0.0643551, 0.2632221, 0.4282453};
/* Visualization of r concatenation values in shape order
[[-0.1866051 -0.392639 0.4665768]
[-0.1866051 -0.392639 0.4665768]]
*/
float default_bidir_input_biases_r_values[] = {
-0.1866051, -0.392639, 0.4665768, -0.1866051, -0.392639, 0.4665768};
/* Visualization of h concatenation values in shape order
[[-0.3741214 0.4407408 -0.2892259]
[-0.3741214 0.4407408 -0.2892259]]
*/
float default_bidir_input_biases_h_values[] = {
-0.3741214, 0.4407408, -0.2892259, -0.3741214, 0.4407408, -0.2892259};
/******************************************************************************
default_bidir_hidden_weights
******************************************************************************/
uint32_t default_bidir_hidden_weights_shape[] = {2, 3, 3};
/* Visualization of z concatenation values in shape order
[[[ 0.4629621 0.4114995 -0.049397 ]
[ 0.4833339 -0.1453276 -0.1190602]
[ 0.113032 0.4688771 -0.2869941]]
[[ 0.4629621 0.4114995 -0.049397 ]
[ 0.4833339 -0.1453276 -0.1190602]
[ 0.113032 0.4688771 -0.2869941]]]
*/
float default_bidir_hidden_weights_z_values[] = {
0.4629621, 0.4114995, -0.049397, 0.4833339, -0.1453276, -0.1190602,
0.113032, 0.4688771, -0.2869941, 0.4629621, 0.4114995, -0.049397,
0.4833339, -0.1453276, -0.1190602, 0.113032, 0.4688771, -0.2869941};
/* Visualization of r concatenation values in shape order
[[[ 0.5423677 0.5621256 -0.5199673]
[-0.5070595 0.0945408 0.2686667]
[-0.0710383 -0.1628114 0.4383084]]
[[ 0.5423677 0.5621256 -0.5199673]
[-0.5070595 0.0945408 0.2686667]
[-0.0710383 -0.1628114 0.4383084]]]
*/
float default_bidir_hidden_weights_r_values[] = {
0.5423677, 0.5621256, -0.5199673, -0.5070595, 0.0945408, 0.2686667,
-0.0710383, -0.1628114, 0.4383084, 0.5423677, 0.5621256, -0.5199673,
-0.5070595, 0.0945408, 0.2686667, -0.0710383, -0.1628114, 0.4383084};
/* Visualization of h concatenation values in shape order
[[[ 0.3073992 -0.3689663 -0.3204532]
[ 0.233599 -0.3069769 -0.3292732]
[ 0.3672419 0.5463605 -0.1544762]]
[[ 0.3073992 -0.3689663 -0.3204532]
[ 0.233599 -0.3069769 -0.3292732]
[ 0.3672419 0.5463605 -0.1544762]]]
*/
float default_bidir_hidden_weights_h_values[] = {
0.3073992, -0.3689663, -0.3204532, 0.233599, -0.3069769, -0.3292732,
0.3672419, 0.5463605, -0.1544762, 0.3073992, -0.3689663, -0.3204532,
0.233599, -0.3069769, -0.3292732, 0.3672419, 0.5463605, -0.1544762};
/******************************************************************************
default_bidir_hidden_biases
******************************************************************************/
uint32_t default_bidir_hidden_biases_shape[] = {2, 3};
/* Visualization of z concatenation values in shape order
[[0.5068286 0.3320496 0.5366269]
[0.5068286 0.3320496 0.5366269]]
*/
float default_bidir_hidden_biases_z_values[] = {
0.5068286, 0.3320496, 0.5366269, 0.5068286, 0.3320496, 0.5366269};
/* Visualization of r concatenation values in shape order
[[-0.0919193 0.4369227 0.5323023]
[-0.0919193 0.4369227 0.5323023]]
*/
float default_bidir_hidden_biases_r_values[] = {
-0.0919193, 0.4369227, 0.5323023, -0.0919193, 0.4369227, 0.5323023};
/* Visualization of h concatenation values in shape order
[[-0.2080224 -0.0367477 -0.1974721]
[-0.2080224 -0.0367477 -0.1974721]]
*/
float default_bidir_hidden_biases_h_values[] = {
-0.2080224, -0.0367477, -0.1974721, -0.2080224, -0.0367477, -0.1974721};
/******************************************************************************
default_fwd_exp_hn_out_all_ts
******************************************************************************/
uint32_t default_fwd_hn_out_all_ts_shape[] = {5, 1, 2, 3};
/* Visualization of values in shape order
[[[-0.1562103 0.1410986 -0.1123356]
[-0.1553763 0.1372994 -0.1123919]]
[[-0.253498 0.1940096 -0.1891814]
[-0.2523776 0.1878957 -0.1889893]]
[[-0.3126792 0.1866586 -0.2388406]
[-0.3114854 0.179318 -0.2382826]]
[[-0.3473134 0.1435677 -0.2676416]
[-0.3461194 0.1356744 -0.2667077]]
[[-0.3660706 0.0814286 -0.2807784]
[-0.3648955 0.0733736 -0.2795098]]]
*/
float default_fwd_exp_hn_out_all_ts_values[] = {
-0.1562103, 0.1410986, -0.1123356, -0.1553763, 0.1372994, -0.1123919,
-0.253498, 0.1940096, -0.1891814, -0.2523776, 0.1878957, -0.1889893,
-0.3126792, 0.1866586, -0.2388406, -0.3114854, 0.179318, -0.2382826,
-0.3473134, 0.1435677, -0.2676416, -0.3461194, 0.1356744, -0.2667077,
-0.3660706, 0.0814286, -0.2807784, -0.3648955, 0.0733736, -0.2795098};
/******************************************************************************
default_fwd_exp_hn_out_final_ts
******************************************************************************/
uint32_t default_fwd_hn_out_final_ts_shape[] = {1, 1, 2, 3};
/* Visualization of values in shape order
[[[-0.3660706 0.0814286 -0.2807784]
[-0.3648955 0.0733736 -0.2795098]]]
*/
float default_fwd_exp_hn_out_final_ts_values[] = {
-0.3660706, 0.0814286, -0.2807784, -0.3648955, 0.0733736, -0.2795098};
/******************************************************************************
default_bwd_exp_hn_out_all_ts
******************************************************************************/
uint32_t default_bwd_hn_out_all_ts_shape[] = {5, 1, 2, 3};
/* Visualization of values in shape order
[[[-0.4037485 0.2564563 -0.2790346]
[-0.4026485 0.2477951 -0.2778324]]
[[-0.3612258 0.1689991 -0.2550354]
[-0.3600727 0.1606691 -0.2541449]]
[[-0.3028114 0.0906047 -0.224893 ]
[-0.3015861 0.083261 -0.2243577]]
[[-0.223746 0.0309375 -0.1819546]
[-0.2225393 0.025346 -0.1817581]]
[[-0.1217477 -0.0007261 -0.1141484]
[-0.1208584 -0.0038126 -0.1141814]]]
*/
float default_bwd_exp_hn_out_all_ts_values[] = {
-0.4037485, 0.2564563, -0.2790346, -0.4026485, 0.2477951, -0.2778324,
-0.3612258, 0.1689991, -0.2550354, -0.3600727, 0.1606691, -0.2541449,
-0.3028114, 0.0906047, -0.224893, -0.3015861, 0.083261, -0.2243577,
-0.223746, 0.0309375, -0.1819546, -0.2225393, 0.025346, -0.1817581,
-0.1217477, -0.0007261, -0.1141484, -0.1208584, -0.0038126, -0.1141814};
/******************************************************************************
default_bwd_exp_hn_out_final_ts
******************************************************************************/
uint32_t default_bwd_hn_out_final_ts_shape[] = {1, 1, 2, 3};
/* Visualization of values in shape order
[[[-0.4037485 0.2564563 -0.2790346]
[-0.4026485 0.2477951 -0.2778324]]]
*/
float default_bwd_exp_hn_out_final_ts_values[] = {
-0.4037485, 0.2564563, -0.2790346, -0.4026485, 0.2477951, -0.2778324};
/******************************************************************************
default_bidir_exp_hn_out_all_ts
******************************************************************************/
uint32_t default_bidir_hn_out_all_ts_shape[] = {5, 2, 2, 3};
/* Visualization of values in shape order
[[[-0.1562103 0.1410986 -0.1123356 -0.1553763 0.1372994 -0.1123919]
[-0.4037485 0.2564563 -0.2790346 -0.4026485 0.2477951 -0.2778324]]
[[-0.253498 0.1940096 -0.1891814 -0.2523776 0.1878956 -0.1889893]
[-0.3612258 0.1689991 -0.2550354 -0.3600727 0.1606691 -0.2541449]]
[[-0.3126791 0.1866586 -0.2388406 -0.3114854 0.179318 -0.2382826]
[-0.3028114 0.0906047 -0.2248929 -0.3015861 0.083261 -0.2243577]]
[[-0.3473134 0.1435677 -0.2676416 -0.3461194 0.1356744 -0.2667077]
[-0.223746 0.0309375 -0.1819546 -0.2225393 0.025346 -0.1817581]]
[[-0.3660705 0.0814286 -0.2807783 -0.3648955 0.0733736 -0.2795098]
[-0.1217477 -0.0007261 -0.1141484 -0.1208584 -0.0038126 -0.1141814]]]
*/
float default_bidir_exp_hn_out_all_ts_values[] = {
-0.1562103, 0.1410986, -0.1123356, -0.1553763, 0.1372994, -0.1123919,
-0.4037485, 0.2564563, -0.2790346, -0.4026485, 0.2477951, -0.2778324,
-0.253498, 0.1940096, -0.1891814, -0.2523776, 0.1878956, -0.1889893,
-0.3612258, 0.1689991, -0.2550354, -0.3600727, 0.1606691, -0.2541449,
-0.3126791, 0.1866586, -0.2388406, -0.3114854, 0.179318, -0.2382826,
-0.3028114, 0.0906047, -0.2248929, -0.3015861, 0.083261, -0.2243577,
-0.3473134, 0.1435677, -0.2676416, -0.3461194, 0.1356744, -0.2667077,
-0.223746, 0.0309375, -0.1819546, -0.2225393, 0.025346, -0.1817581,
-0.3660705, 0.0814286, -0.2807783, -0.3648955, 0.0733736, -0.2795098,
-0.1217477, -0.0007261, -0.1141484, -0.1208584, -0.0038126, -0.1141814};
/******************************************************************************
default_bidir_exp_hn_out_final_ts
******************************************************************************/
uint32_t default_bidir_hn_out_final_ts_shape[] = {1, 2, 2, 3};
/* Visualization of values in shape order
[[[-0.3660705 0.0814286 -0.2807783 -0.3648955 0.0733736 -0.2795098]
[-0.4037485 0.2564563 -0.2790346 -0.4026485 0.2477951 -0.2778324]]]
*/
float default_bidir_exp_hn_out_final_ts_values[] = {
-0.3660705, 0.0814286, -0.2807783, -0.3648955, 0.0733736, -0.2795098,
-0.4037485, 0.2564563, -0.2790346, -0.4026485, 0.2477951, -0.2778324};
/******************************************************************************
Unity Methods
******************************************************************************/
void setUp(void) { VERIFY_HW_ENV; }
void tearDown(void) {}
/******************************************************************************
Tests
******************************************************************************/
// Confirm that gru returns OK and expected values when set to return hn
// results from all timesteps
void gru_basic_fwd_hn_all() {
test_zdnn_api_lstm_gru(
NNPA_GRUACT,
default_input_shape, ZDNN_3DS, default_input_values,
default_uni_h0_shape, ZDNN_3DS, default_uni_h0_values,
// The test method also supports LSTM which requires c0, pass in h0 again
// as a stand-in for c0 which the test will ignore for GRU networks.
default_uni_h0_shape, ZDNN_3DS, default_uni_h0_values,
// The fourth gate isn't used for GRU so send ZERO_ARRAY
default_uni_input_weights_shape, ZDNN_3DS,
default_uni_input_weights_z_values, default_uni_input_weights_r_values,
default_uni_input_weights_h_values, ZERO_ARRAY,
// The fourth gate isn't used for GRU so send ZERO_ARRAY
default_uni_input_biases_shape, ZDNN_2DS,
default_uni_input_biases_z_values, default_uni_input_biases_r_values,
default_uni_input_biases_h_values, ZERO_ARRAY,
// The fourth gate isn't used for GRU so send ZERO_ARRAY
default_uni_hidden_weights_shape, ZDNN_3DS,
default_uni_hidden_weights_z_values, default_uni_hidden_weights_r_values,
default_uni_hidden_weights_h_values, ZERO_ARRAY,
// The fourth gate isn't used for GRU so send ZERO_ARRAY
default_uni_hidden_biases_shape, ZDNN_2DS,
default_uni_hidden_biases_z_values, default_uni_hidden_biases_r_values,
default_uni_hidden_biases_h_values, ZERO_ARRAY,
default_fwd_hn_out_all_ts_shape, ZDNN_4DS,
default_fwd_exp_hn_out_all_ts_values,
// The test method also supports LSTM which requires cf, pass NULL for GRU
NULL, ZDNN_3DS, NULL,
FWD, ZDNN_OK);
}
// Confirm that gru returns OK and expected values when set to return only
// the final hn result
void gru_basic_fwd_hn_final() {
test_zdnn_api_lstm_gru(
NNPA_GRUACT,
default_input_shape, ZDNN_3DS, default_input_values,
default_uni_h0_shape, ZDNN_3DS, default_uni_h0_values,
// The test method also supports LSTM which requires c0, pass in h0 again
// as a stand-in for c0 which the test will ignore for GRU networks.
default_uni_h0_shape, ZDNN_3DS, default_uni_h0_values,
// The fourth gate isn't used for GRU so send ZERO_ARRAY
default_uni_input_weights_shape, ZDNN_3DS,
default_uni_input_weights_z_values, default_uni_input_weights_r_values,
default_uni_input_weights_h_values, ZERO_ARRAY,
// The fourth gate isn't used for GRU so send ZERO_ARRAY
default_uni_input_biases_shape, ZDNN_2DS,
default_uni_input_biases_z_values, default_uni_input_biases_r_values,
default_uni_input_biases_h_values, ZERO_ARRAY,
// The fourth gate isn't used for GRU so send ZERO_ARRAY
default_uni_hidden_weights_shape, ZDNN_3DS,
default_uni_hidden_weights_z_values, default_uni_hidden_weights_r_values,
default_uni_hidden_weights_h_values, ZERO_ARRAY,
// The fourth gate isn't used for GRU so send ZERO_ARRAY
default_uni_hidden_biases_shape, ZDNN_2DS,
default_uni_hidden_biases_z_values, default_uni_hidden_biases_r_values,
default_uni_hidden_biases_h_values, ZERO_ARRAY,
default_fwd_hn_out_final_ts_shape, ZDNN_4DS,
default_fwd_exp_hn_out_final_ts_values,
// The test method also supports LSTM which requires cf, pass NULL for GRU
NULL, ZDNN_3DS, NULL,
FWD, ZDNN_OK);
}
// Confirm that gru returns OK and expected values when set to return hn
// results from all timesteps
void gru_basic_bwd_hn_all() {
test_zdnn_api_lstm_gru(
NNPA_GRUACT,
default_input_shape, ZDNN_3DS, default_input_values,
default_uni_h0_shape, ZDNN_3DS, default_uni_h0_values,
// The test method also supports LSTM which requires c0, pass in h0 again
// as a stand-in for c0 which the test will ignore for GRU networks.
default_uni_h0_shape, ZDNN_3DS, default_uni_h0_values,
// The fourth gate isn't used for GRU so send ZERO_ARRAY
default_uni_input_weights_shape, ZDNN_3DS,
default_uni_input_weights_z_values, default_uni_input_weights_r_values,
default_uni_input_weights_h_values, ZERO_ARRAY,
// The fourth gate isn't used for GRU so send ZERO_ARRAY
default_uni_input_biases_shape, ZDNN_2DS,
default_uni_input_biases_z_values, default_uni_input_biases_r_values,
default_uni_input_biases_h_values, ZERO_ARRAY,
// The fourth gate isn't used for GRU so send ZERO_ARRAY
default_uni_hidden_weights_shape, ZDNN_3DS,
default_uni_hidden_weights_z_values, default_uni_hidden_weights_r_values,
default_uni_hidden_weights_h_values, ZERO_ARRAY,
// The fourth gate isn't used for GRU so send ZERO_ARRAY
default_uni_hidden_biases_shape, ZDNN_2DS,
default_uni_hidden_biases_z_values, default_uni_hidden_biases_r_values,
default_uni_hidden_biases_h_values, ZERO_ARRAY,
default_bwd_hn_out_all_ts_shape, ZDNN_4DS,
default_bwd_exp_hn_out_all_ts_values,
// The test method also supports LSTM which requires cf, pass NULL for GRU
NULL, ZDNN_3DS, NULL,
BWD, ZDNN_OK);
}
// Confirm that gru returns OK and expected values when set to return only
// the final hn result
void gru_basic_bwd_hn_final() {
test_zdnn_api_lstm_gru(
NNPA_GRUACT,
default_input_shape, ZDNN_3DS, default_input_values,
default_uni_h0_shape, ZDNN_3DS, default_uni_h0_values,
// The test method also supports LSTM which requires c0, pass in h0 again
// as a stand-in for c0 which the test will ignore for GRU networks.
default_uni_h0_shape, ZDNN_3DS, default_uni_h0_values,
// The fourth gate isn't used for GRU so send ZERO_ARRAY
default_uni_input_weights_shape, ZDNN_3DS,
default_uni_input_weights_z_values, default_uni_input_weights_r_values,
default_uni_input_weights_h_values, ZERO_ARRAY,
// The fourth gate isn't used for GRU so send ZERO_ARRAY
default_uni_input_biases_shape, ZDNN_2DS,
default_uni_input_biases_z_values, default_uni_input_biases_r_values,
default_uni_input_biases_h_values, ZERO_ARRAY,
// The fourth gate isn't used for GRU so send ZERO_ARRAY
default_uni_hidden_weights_shape, ZDNN_3DS,
default_uni_hidden_weights_z_values, default_uni_hidden_weights_r_values,
default_uni_hidden_weights_h_values, ZERO_ARRAY,
// The fourth gate isn't used for GRU so send ZERO_ARRAY
default_uni_hidden_biases_shape, ZDNN_2DS,
default_uni_hidden_biases_z_values, default_uni_hidden_biases_r_values,
default_uni_hidden_biases_h_values, ZERO_ARRAY,
default_bwd_hn_out_final_ts_shape, ZDNN_4DS,
default_bwd_exp_hn_out_final_ts_values,
// The test method also supports LSTM which requires cf, pass NULL for GRU
NULL, ZDNN_3DS, NULL,
BWD, ZDNN_OK);
}
// Confirm that gru returns OK and expected values when set to return hn
// results from all timesteps
void gru_basic_bidir_hn_all() {
test_zdnn_api_lstm_gru(
NNPA_GRUACT,
default_input_shape, ZDNN_3DS, default_input_values,
default_bidir_h0_shape, ZDNN_3DS, default_bidir_h0_values,
// The test method also supports LSTM which requires c0, pass in h0 again
// as a stand-in for c0 which the test will ignore for GRU networks.
default_bidir_h0_shape, ZDNN_3DS, default_bidir_h0_values,
// The fourth gate isn't used for GRU so send ZERO_ARRAY
default_bidir_input_weights_shape, ZDNN_3DS,
default_bidir_input_weights_z_values,
default_bidir_input_weights_r_values,
default_bidir_input_weights_h_values, ZERO_ARRAY,
// The fourth gate isn't used for GRU so send ZERO_ARRAY
default_bidir_input_biases_shape, ZDNN_2DS,
default_bidir_input_biases_z_values, default_bidir_input_biases_r_values,
default_bidir_input_biases_h_values, ZERO_ARRAY,
// The fourth gate isn't used for GRU so send ZERO_ARRAY
default_bidir_hidden_weights_shape, ZDNN_3DS,
default_bidir_hidden_weights_z_values,
default_bidir_hidden_weights_r_values,
default_bidir_hidden_weights_h_values, ZERO_ARRAY,
// The fourth gate isn't used for GRU so send ZERO_ARRAY
default_bidir_hidden_biases_shape, ZDNN_2DS,
default_bidir_hidden_biases_z_values,
default_bidir_hidden_biases_r_values,
default_bidir_hidden_biases_h_values, ZERO_ARRAY,
default_bidir_hn_out_all_ts_shape, ZDNN_4DS,
default_bidir_exp_hn_out_all_ts_values,
// The test method also supports LSTM which requires cf, pass NULL for GRU
NULL, ZDNN_3DS, NULL,
BIDIR, ZDNN_OK);
}
// Confirm that gru returns OK and expected values when set to return only
// the final hn result
void gru_basic_bidir_hn_final() {
test_zdnn_api_lstm_gru(
NNPA_GRUACT,
default_input_shape, ZDNN_3DS, default_input_values,
default_bidir_h0_shape, ZDNN_3DS, default_bidir_h0_values,
// The test method also supports LSTM which requires c0, pass in h0 again
// as a stand-in for c0 which the test will ignore for GRU networks.
default_bidir_h0_shape, ZDNN_3DS, default_bidir_h0_values,
// The fourth gate isn't used for GRU so send ZERO_ARRAY
default_bidir_input_weights_shape, ZDNN_3DS,
default_bidir_input_weights_z_values,
default_bidir_input_weights_r_values,
default_bidir_input_weights_h_values, ZERO_ARRAY,
// The fourth gate isn't used for GRU so send ZERO_ARRAY
default_bidir_input_biases_shape, ZDNN_2DS,
default_bidir_input_biases_z_values, default_bidir_input_biases_r_values,
default_bidir_input_biases_h_values, ZERO_ARRAY,
// The fourth gate isn't used for GRU so send ZERO_ARRAY
default_bidir_hidden_weights_shape, ZDNN_3DS,
default_bidir_hidden_weights_z_values,
default_bidir_hidden_weights_r_values,
default_bidir_hidden_weights_h_values, ZERO_ARRAY,
// The fourth gate isn't used for GRU so send ZERO_ARRAY
default_bidir_hidden_biases_shape, ZDNN_2DS,
default_bidir_hidden_biases_z_values,
default_bidir_hidden_biases_r_values,
default_bidir_hidden_biases_h_values, ZERO_ARRAY,
default_bidir_hn_out_final_ts_shape, ZDNN_4DS,
default_bidir_exp_hn_out_final_ts_values,
// The test method also supports LSTM which requires cf, pass NULL for GRU
NULL, ZDNN_3DS, NULL,
BIDIR, ZDNN_OK);
}
int main() {
UNITY_BEGIN();
// GRU tests with good input requires zAIU to get results and
// validate values.
// FWD direction tests
RUN_TEST_ALL_DLFLOAT16_PRE_DATATYPES(gru_basic_fwd_hn_all);
RUN_TEST_ALL_DLFLOAT16_PRE_DATATYPES(gru_basic_fwd_hn_final);
// BWD direction tests
RUN_TEST_ALL_DLFLOAT16_PRE_DATATYPES(gru_basic_bwd_hn_all);
RUN_TEST_ALL_DLFLOAT16_PRE_DATATYPES(gru_basic_bwd_hn_final);
// BIDIR direction tests
RUN_TEST_ALL_DLFLOAT16_PRE_DATATYPES(gru_basic_bidir_hn_all);
RUN_TEST_ALL_DLFLOAT16_PRE_DATATYPES(gru_basic_bidir_hn_final);
return UNITY_END();
}
zDNN-1.1.2/tests/testDriver_zdnn_invsqrt.c 0000664 0000000 0000000 00000025344 15000221702 0020601 0 ustar 00root root 0000000 0000000 // SPDX-License-Identifier: Apache-2.0
/*
* Copyright IBM Corp. 2023, 2024
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#include "common_elwise.h"
#include
void setUp(void) {
VERIFY_HW_ENV;
VERIFY_PARMBLKFORMAT_1;
tol_bfloat.ulps = MAX_ULPS_BFLOAT;
tol_bfloat.epsilon_mult = MAX_EPSILON_MULT_BFLOAT;
// note: api_invsqrt_med_dims (FP16)
// api_invsqrt_med_dims_1 (FP16)
// api_invsqrt_high_dims (FP16)
// api_invsqrt_high_dims_1 (FP16)
// need custom tolerance
tol_fp16.ulps = MAX_ULPS_FP16;
tol_fp16.epsilon_mult = (0.63 / EPSILON_FP16) + 1;
tol_fp32.ulps = MAX_ULPS_FLOAT;
tol_fp32.epsilon_mult = MAX_EPSILON_MULT_FLOAT;
}
void tearDown(void) {}
float invsqrtf(float x, float e) { return 1.0 / sqrtf(x + e); }
/*
* Simple test to drive a full invsqrt api.
*/
void zdnn_invsqrt_test(uint32_t *io_dims, zdnn_data_layouts layout,
float *input, float epsilon, zdnn_status expected_status,
float *expected_values) {
/*
* Input Tensor
*/
zdnn_ztensor *input_ztensor = alloc_ztensor_with_values(
io_dims, layout, test_datatype, NO_CONCAT, false, input);
/*
* Output Tensor
*/
zdnn_ztensor *output_ztensor = alloc_ztensor_with_values(
io_dims, layout, test_datatype, NO_CONCAT, true, ZERO_ARRAY);
/*
* Begin Testing!
*/
zdnn_status status = zdnn_invsqrt(input_ztensor, epsilon, output_ztensor);
TEST_ASSERT_MESSAGE_FORMATTED(
status == expected_status,
"call to zdnn_invsqrt() returned status %08x but expected %08x\n",
status, expected_status);
// To allow for unique tolerance
fp_tolerance *tol = NULL;
switch (output_ztensor->pre_transformed_desc->type) {
case BFLOAT:
tol = &tol_bfloat;
break;
case FP16:
tol = &tol_fp16;
break;
case FP32:
tol = &tol_fp32;
break;
default:
break;
// should never get here
}
if (expected_status == ZDNN_OK) {
assert_ztensor_values_adv(output_ztensor, false, expected_values, *tol);
}
// All done--clean up the tensor buffers
free_ztensor_buffers(2, input_ztensor, output_ztensor);
}
void api_invsqrt_basic() {
/* Input values as true NHWC sized (1,2,2,2)
[[
[[3, 30], [6, 60]],
[[8, 80], [3, 10]]
]]
*/
// Values in ZDNN_NHWC order
uint32_t shape[] = {1, 2, 2, 2};
float input_values[] = {3, 30, 6, 60, 8, 80, 3, 10};
float epsilon = 0;
/* Expected values as true NHWC sized (1,2,2,2)
[[
[[0.577148, 0.182617], [0.408203, 0.129150]],
[[0.353516, 0.111816], [0.577148, 0.316406]]
]]
*/
float expected_values[] = {0.577148, 0.182617, 0.408203, 0.129150,
0.353516, 0.111816, 0.577148, 0.316406};
zdnn_invsqrt_test(shape, ZDNN_NHWC, input_values, epsilon, ZDNN_OK,
expected_values);
}
void api_invsqrt_basic_1() {
/* Input values as true NHWC sized (1,2,2,2)
[[
[[3, 30], [6, 60]],
[[8, 80], [3, 10]]
]]
*/
// Values in ZDNN_NHWC order
uint32_t shape[] = {1, 2, 2, 2};
float input_values[] = {3, 30, 6, 60, 8, 80, 3, 10};
float epsilon = 0.001;
/* Expected values as true NHWC sized (1,2,2,2)
[[
[[0.577148, 0.182617], [0.408203, 0.129150]],
[[0.353516, 0.111816], [0.577148, 0.316406]]
]]
*/
float expected_values[] = {0.577148, 0.182617, 0.408203, 0.129150,
0.353516, 0.111816, 0.577148, 0.316406};
zdnn_invsqrt_test(shape, ZDNN_NHWC, input_values, epsilon, ZDNN_OK,
expected_values);
}
// test to drive input tensors with 280 values in their buffer.
void api_invsqrt_med_dims() {
uint32_t shape[] = {1, 7, 10, 4};
int num_io_buffer_values = shape[0] * shape[1] * shape[2] * shape[3];
float epsilon = 0;
// Values in ZDNN_NHWC order
float input_values[num_io_buffer_values];
gen_random_float_array(num_io_buffer_values, input_values);
float expected_values[num_io_buffer_values];
for (int i = 0; i < num_io_buffer_values; i++) {
expected_values[i] = invsqrtf(input_values[i], epsilon);
}
zdnn_invsqrt_test(shape, ZDNN_NHWC, input_values, epsilon, ZDNN_OK,
expected_values);
}
void api_invsqrt_med_dims_1() {
uint32_t shape[] = {1, 7, 10, 4};
int num_io_buffer_values = shape[0] * shape[1] * shape[2] * shape[3];
float epsilon = 0.001;
// Values in ZDNN_NHWC order
float input_values[num_io_buffer_values];
gen_random_float_array(num_io_buffer_values, input_values);
float expected_values[num_io_buffer_values];
for (int i = 0; i < num_io_buffer_values; i++) {
expected_values[i] = invsqrtf(input_values[i], epsilon);
}
zdnn_invsqrt_test(shape, ZDNN_NHWC, input_values, epsilon, ZDNN_OK,
expected_values);
}
// test to drive an input tensor with 6825 values in its buffer
void api_invsqrt_high_dims() {
uint32_t shape[] = {1, 3, 33, 65};
int num_io_buffer_values = shape[0] * shape[1] * shape[2] * shape[3];
float epsilon = 0;
// Values in ZDNN_NHWC order
float input_values[num_io_buffer_values];
gen_random_float_array(num_io_buffer_values, input_values);
float expected_values[num_io_buffer_values];
for (int i = 0; i < num_io_buffer_values; i++) {
expected_values[i] = invsqrtf(input_values[i], epsilon);
}
zdnn_invsqrt_test(shape, ZDNN_NHWC, input_values, epsilon, ZDNN_OK,
expected_values);
}
void api_invsqrt_high_dims_1() {
uint32_t shape[] = {1, 3, 33, 65};
int num_io_buffer_values = shape[0] * shape[1] * shape[2] * shape[3];
float epsilon = 0.001;
// Values in ZDNN_NHWC order
float input_values[num_io_buffer_values];
gen_random_float_array(num_io_buffer_values, input_values);
float expected_values[num_io_buffer_values];
for (int i = 0; i < num_io_buffer_values; i++) {
expected_values[i] = invsqrtf(input_values[i], epsilon);
}
zdnn_invsqrt_test(shape, ZDNN_NHWC, input_values, epsilon, ZDNN_OK,
expected_values);
}
/*
* Simple test to drive a full invsqrt api using data type and a 3D layout
*/
void api_invsqrt_3D() {
/* Input 1 values as true NHWC sized (1,2,2,2)
[[
[[3, 30], [6, 60]],
[[8, 80], [9, 90]]
]]
*/
// Values in ZDNN_NHWC order
uint32_t shape[] = {2, 2, 2};
float input_values[] = {3, 30, 6, 60, 8, 80, 9, 90};
float epsilon = 0;
/* Expected values as true NHWC sized (1,2,2,2)
[[
[[0.577148, 0.182617], [0.408203, 0.129150]],
[[0.353516, 0.111816], [0.333496, 0.105469]]
]]
*/
float expected_values[] = {0.577148, 0.182617, 0.408203, 0.129150,
0.353516, 0.111816, 0.333496, 0.105469};
zdnn_invsqrt_test(shape, ZDNN_3D, input_values, epsilon, ZDNN_OK,
expected_values);
}
void api_invsqrt_3D_1() {
/* Input 1 values as true NHWC sized (1,2,2,2)
[[
[[3, 30], [6, 60]],
[[8, 80], [9, 90]]
]]
*/
// Values in ZDNN_NHWC order
uint32_t shape[] = {2, 2, 2};
float input_values[] = {3, 30, 6, 60, 8, 80, 9, 90};
float epsilon = 0.001;
/* Expected values as true NHWC sized (1,2,2,2)
[[
[[0.577148, 0.182617], [0.408203, 0.129150]],
[[0.353516, 0.111816], [0.333496, 0.105469]]
]]
*/
float expected_values[] = {0.577148, 0.182617, 0.408203, 0.129150,
0.353516, 0.111816, 0.333496, 0.105469};
zdnn_invsqrt_test(shape, ZDNN_3D, input_values, epsilon, ZDNN_OK,
expected_values);
}
/*
* Simple test to drive a full invsqrt api using the data type and a 2D layout
*/
void api_invsqrt_2D() {
// Values in ZDNN_NHWC order
uint32_t shape[] = {2, 2};
/* Input 1 values as true NHWC sized (1,1,2,2)
[[
[[1, 10], [2, 6]]
]]
*/
float input_values[] = {1, 10, 2, 6};
float epsilon = 0;
/* Expected values as true NHWC sized (1,1,2,2)
[[
[[1, 0.316406], [0.707031, 0.408203]]
]]
*/
float expected_values[] = {1, 0.316406, 0.707031, 0.408203};
zdnn_invsqrt_test(shape, ZDNN_2D, input_values, epsilon, ZDNN_OK,
expected_values);
}
void api_invsqrt_2D_1() {
// Values in ZDNN_NHWC order
uint32_t shape[] = {2, 2};
/* Input 1 values as true NHWC sized (1,1,2,2)
[[
[[1, 10], [2, 6]]
]]
*/
float input_values[] = {1, 10, 2, 6};
float epsilon = 0.001;
/* Expected values as true NHWC sized (1,1,2,2)
[[
[[1, 0.316406], [0.707031, 0.408203]]
]]
*/
float expected_values[] = {1, 0.316406, 0.707031, 0.408203};
zdnn_invsqrt_test(shape, ZDNN_2D, input_values, epsilon, ZDNN_OK,
expected_values);
}
/*
* Simple test to drive a full invsqrt api using the data type and a 1D layout
*/
void api_invsqrt_1D() {
// Values in ZDNN_NHWC order
uint32_t shape[] = {2};
/* Input 1 values as true NHWC sized (1,1,2,2)
[[
[[6, 7]]
]]
*/
float input_values[] = {6, 7};
float epsilon = 0;
/* Expected values as true NHWC sized (1,1,2,2)
[[
[[0.408203, 0.377930]]
]]
*/
float expected_values[] = {0.408203, 0.377930};
zdnn_invsqrt_test(shape, ZDNN_1D, input_values, epsilon, ZDNN_OK,
expected_values);
}
void api_invsqrt_1D_1() {
// Values in ZDNN_NHWC order
uint32_t shape[] = {2};
/* Input 1 values as true NHWC sized (1,1,2,2)
[[
[[6, 7]]
]]
*/
float input_values[] = {6, 7};
float epsilon = 0.001;
/* Expected values as true NHWC sized (1,1,2,2)
[[
[[0.408203, 0.377930]]
]]
*/
float expected_values[] = {0.408203, 0.377930};
zdnn_invsqrt_test(shape, ZDNN_1D, input_values, epsilon, ZDNN_OK,
expected_values);
}
int main() {
UNITY_BEGIN();
RUN_TEST_ALL_DLFLOAT16_PRE_DATATYPES(api_invsqrt_basic);
RUN_TEST_ALL_DLFLOAT16_PRE_DATATYPES(api_invsqrt_basic_1);
RUN_TEST_ALL_DLFLOAT16_PRE_DATATYPES(api_invsqrt_med_dims);
RUN_TEST_ALL_DLFLOAT16_PRE_DATATYPES(api_invsqrt_med_dims_1);
RUN_TEST_ALL_DLFLOAT16_PRE_DATATYPES(api_invsqrt_high_dims);
RUN_TEST_ALL_DLFLOAT16_PRE_DATATYPES(api_invsqrt_high_dims_1);
RUN_TEST_ALL_DLFLOAT16_PRE_DATATYPES(api_invsqrt_3D);
RUN_TEST_ALL_DLFLOAT16_PRE_DATATYPES(api_invsqrt_3D_1);
RUN_TEST_ALL_DLFLOAT16_PRE_DATATYPES(api_invsqrt_2D);
RUN_TEST_ALL_DLFLOAT16_PRE_DATATYPES(api_invsqrt_2D_1);
RUN_TEST_ALL_DLFLOAT16_PRE_DATATYPES(api_invsqrt_1D);
RUN_TEST_ALL_DLFLOAT16_PRE_DATATYPES(api_invsqrt_1D_1);
return UNITY_END();
}
zDNN-1.1.2/tests/testDriver_zdnn_layernorm.c 0000664 0000000 0000000 00000013515 15000221702 0021100 0 ustar 00root root 0000000 0000000 // SPDX-License-Identifier: Apache-2.0
/*
* Copyright IBM Corp. 2023, 2024
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#include "common_act.h"
void setUp(void) {
VERIFY_HW_ENV;
VERIFY_PARMBLKFORMAT_1;
}
void tearDown(void) {}
/**
* zdnn_norm_test
*
* Handles all the logic to run custom tests.
*/
void zdnn_layernorm_test(uint32_t *i_dims, uint32_t *bc_dims, uint32_t *o_dims,
zdnn_data_layouts layout, float *input_a,
float *input_b, float *input_c, const float beta_value,
const float gamma_value, const float epsilon_value,
zdnn_status expected_status, float *expected_values) {
/*
* Input Tensor a
*/
zdnn_ztensor *input_ztensor_a = alloc_ztensor_with_values(
i_dims, layout, test_datatype, NO_CONCAT, false, input_a);
/*
* Input Tensor b
*/
zdnn_ztensor *input_ztensor_b = alloc_ztensor_with_values(
bc_dims, layout, test_datatype, NO_CONCAT, false, input_b);
/*
* Input Tensor c
*/
zdnn_ztensor *input_ztensor_c = alloc_ztensor_with_values(
bc_dims, layout, test_datatype, NO_CONCAT, false, input_c);
/*
* Output Tensor
*/
zdnn_ztensor *output_ztensor = alloc_ztensor_with_values(
o_dims, layout, test_datatype, NO_CONCAT, true, ZERO_ARRAY);
/*
* Begin Testing!
*/
zdnn_status status =
zdnn_layernorm(input_ztensor_a, input_ztensor_b, input_ztensor_c,
beta_value, gamma_value, epsilon_value, output_ztensor);
TEST_ASSERT_MESSAGE_FORMATTED(
status == expected_status,
"call to zdnn_layernorm() to returned status %08x but expected %08x\n",
status, expected_status);
if (expected_status == ZDNN_OK) {
assert_ztensor_values(output_ztensor, false, expected_values);
}
// All done--clean up the tensor buffers
free_ztensor_buffers(4, input_ztensor_a, input_ztensor_b, input_ztensor_c,
output_ztensor);
}
// Calculate values to approximate zDNN LayerNorm
void generate_layernorm_output(const float input_values[], const float mean[],
const float variance[], const float beta,
const float gamma, const float epsilon,
int num_values, float expected_values[]) {
float sum = variance[0] + epsilon;
sum = (sum <= 0.0f) ? 1e-2f : sum;
float invsqrt_val = 1.0 / sqrtf(sum);
for (int i = 0; i < num_values; i++) {
expected_values[i] =
(input_values[i] - mean[0]) * invsqrt_val * gamma + beta;
}
}
void zdnn_layernorm_basic_small_nhwc() {
uint32_t shape_i[] = {1, 1, 2, 5};
uint32_t shape_bc[] = {1, 1, 1, 1};
uint32_t shape_o[] = {1, 1, 2, 5};
int num_io_buffer_values = shape_i[0] * shape_i[1] * shape_i[2] * shape_i[3];
float input_values[] = {0.10, 0.15, 0.20, 0.25, 0.30,
0.35, 0.40, 0.45, 0.50, 0.55};
float mean[] = {0.325};
float variance[] = {0.45};
const float beta = 0.089;
const float gamma = 0.67;
const float epsilon = 0.0001;
float expected_values[num_io_buffer_values];
generate_layernorm_output(input_values, mean, variance, beta, gamma, epsilon,
num_io_buffer_values, expected_values);
zdnn_layernorm_test(shape_i, shape_bc, shape_o, ZDNN_NHWC, input_values, mean,
variance, beta, gamma, epsilon, ZDNN_OK, expected_values);
}
void zdnn_layernorm_basic_large_nhwc_pos_neg() {
uint32_t shape_i[] = {1, 1, 40, 80};
uint32_t shape_bc[] = {1, 1, 1, 1};
uint32_t shape_o[] = {1, 1, 40, 80};
int num_io_buffer_values = shape_i[0] * shape_i[1] * shape_i[2] * shape_i[3];
float input_values[num_io_buffer_values];
gen_random_float_array_pos_neg(num_io_buffer_values, input_values);
float mean[] = {0.729};
float variance[] = {0.25};
const float beta = 0.089;
const float gamma = 0.67;
const float epsilon = 0.0001;
float expected_values[num_io_buffer_values];
generate_layernorm_output(input_values, mean, variance, beta, gamma, epsilon,
num_io_buffer_values, expected_values);
zdnn_layernorm_test(shape_i, shape_bc, shape_o, ZDNN_NHWC, input_values, mean,
variance, beta, gamma, epsilon, ZDNN_OK, expected_values);
}
void zdnn_layernorm_basic_large_nhwc_neg() {
uint32_t shape_i[] = {1, 1, 50, 20};
uint32_t shape_bc[] = {1, 1, 1, 1};
uint32_t shape_o[] = {1, 1, 50, 20};
int num_io_buffer_values = shape_i[0] * shape_i[1] * shape_i[2] * shape_i[3];
float input_values[num_io_buffer_values];
gen_random_float_array_neg(num_io_buffer_values, input_values);
float mean[] = {0.2};
float variance[] = {0.25};
const float beta = 0.089;
const float gamma = 0.67;
const float epsilon = 0.0001;
float expected_values[num_io_buffer_values];
generate_layernorm_output(input_values, mean, variance, beta, gamma, epsilon,
num_io_buffer_values, expected_values);
zdnn_layernorm_test(shape_i, shape_bc, shape_o, ZDNN_NHWC, input_values, mean,
variance, beta, gamma, epsilon, ZDNN_OK, expected_values);
}
int main() {
UNITY_BEGIN();
RUN_TEST_ALL_DLFLOAT16_PRE_DATATYPES(zdnn_layernorm_basic_small_nhwc);
RUN_TEST_ALL_DLFLOAT16_PRE_DATATYPES(zdnn_layernorm_basic_large_nhwc_pos_neg);
RUN_TEST_ALL_DLFLOAT16_PRE_DATATYPES(zdnn_layernorm_basic_large_nhwc_neg);
UNITY_END();
}
zDNN-1.1.2/tests/testDriver_zdnn_leaky_relu.c 0000664 0000000 0000000 00000043624 15000221702 0021230 0 ustar 00root root 0000000 0000000 // SPDX-License-Identifier: Apache-2.0
/*
* Copyright IBM Corp. 2023, 2024
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#include "common_act.h"
// -----------------------------------------------------------------------------
// Leaky ReLU Unit Testing, for convenience, recall the following:
// leaky_relu(x, a) -> if (x>0) {return x; else return x * a;}
// -----------------------------------------------------------------------------
void setUp(void) {
VERIFY_HW_ENV;
VERIFY_PARMBLKFORMAT_1;
}
void tearDown(void) {}
/**
* zdnn_leaky_relu_test
*
* Handles all the logic to run custom tests.
*/
void zdnn_leaky_relu_test(uint32_t *io_dims, zdnn_data_layouts layout,
float *input, const void *clipping_value,
float adjustment_factor, zdnn_status expected_status,
float *expected_values) {
/*
* Input Tensor
*/
zdnn_ztensor *input_ztensor = alloc_ztensor_with_values(
io_dims, layout, test_datatype, NO_CONCAT, false, input);
/*
* Output Tensor
*/
zdnn_ztensor *output_ztensor = alloc_ztensor_with_values(
io_dims, layout, test_datatype, NO_CONCAT, true, ZERO_ARRAY);
/*
* Begin Testing!
*/
zdnn_status status = zdnn_leaky_relu(input_ztensor, clipping_value,
adjustment_factor, output_ztensor);
TEST_ASSERT_MESSAGE_FORMATTED(
status == expected_status,
"call to zdnn_leaky_relu() returned status %08x but expected %08x\n",
status, expected_status);
if (expected_status == ZDNN_OK) {
assert_ztensor_values(output_ztensor, false, expected_values);
}
// All done--clean up the tensor buffers
free_ztensor_buffers(2, input_ztensor, output_ztensor);
}
/*
------------------------------------------------------------------------------
ReLU Basic
Layout: NHWC
------------------------------------------------------------------------------
*/
/**
* zdnn_leaky_relu_basic_nhwc_basic
*
* Simple test of all positive input values
* Expect a mirror of the Input values as the Output values
*
* Input values as NHWC
* [[
* [[1], [2], [3]],
* [[4], [5], [6]],
* [[7], [8], [9]]
* ]]
*
* Expected Output values as NHWC
* [[
* [[1], [2], [3]],
* [[4], [5], [6]],
* [[7], [8], [9]]
* ]]
*/
void zdnn_leaky_relu_basic_nhwc_basic() {
// Initialize the dimensions for our input tensor ZDNN_NHWC
uint32_t shape[] = {1, 3, 3, 1}; // Will be same for in and out dim.
float input_expected_values[] = {1, 2, 3, 4, 5, 6, 7, 8, 9};
float clip_value = 0;
float adj_factor = 0;
zdnn_leaky_relu_test(shape, ZDNN_NHWC, input_expected_values, &clip_value,
adj_factor, ZDNN_OK, input_expected_values);
}
void zdnn_leaky_relu_basic_nhwc_basic_adj() {
// Initialize the dimensions for our input tensor ZDNN_NHWC
uint32_t shape[] = {1, 3, 3, 1}; // Will be same for in and out dim.
float input_expected_values[] = {1, 2, 3, 4, 5, 6, 7, 8, 9};
float clip_value = 0;
float adj_factor = 0.1;
zdnn_leaky_relu_test(shape, ZDNN_NHWC, input_expected_values, &clip_value,
adj_factor, ZDNN_OK, input_expected_values);
}
/**
* zdnn_leaky_relu_basic_nhwc_basic_clip6
*
* Simple test of all positive input values
* Expect a mirror of the Input values as the Output values
*
* Input values as NHWC
* [[
* [[1], [2], [3]],
* [[4], [5], [6]],
* [[7], [8], [9]]
* ]]
*
* Expected Output values as NHWC
* [[
* [[1], [2], [3]],
* [[4], [5], [6]],
* [[6], [6], [6]]
* ]]
*/
void zdnn_leaky_relu_basic_nhwc_basic_clip6() {
// Initialize the dimensions for our input tensor ZDNN_NHWC
uint32_t shape[] = {1, 3, 3, 1}; // Will be same for in and out dim.
float input_expected_values[] = {1, 2, 3, 4, 5, 6, 6, 6, 6};
float clip_value = 6;
float adj_factor = 0;
zdnn_leaky_relu_test(shape, ZDNN_NHWC, input_expected_values, &clip_value,
adj_factor, ZDNN_OK, input_expected_values);
}
void zdnn_leaky_relu_basic_nhwc_basic_clip6_adj() {
// Initialize the dimensions for our input tensor ZDNN_NHWC
uint32_t shape[] = {1, 3, 3, 1}; // Will be same for in and out dim.
float input_expected_values[] = {1, 2, 3, 4, 5, 6, 6, 6, 6};
float clip_value = 6;
float adj_factor = 0.1;
zdnn_leaky_relu_test(shape, ZDNN_NHWC, input_expected_values, &clip_value,
adj_factor, ZDNN_OK, input_expected_values);
}
/*
------------------------------------------------------------------------------
ReLU Basic
Layout: ZDNN_3D
------------------------------------------------------------------------------
*/
/**
* zdnn_leaky_relu_deadneuron_3d_basic
*
* Simple test of all negative input values
* Expect a dead neuron
*
* Input values as NWC sized (3,3,2):
* [[
* [[-1, -10], [-2, -20], [-3, -30]],
* [[-4, -40], [-5, -50], [-6, -60]],
* [[-7, -70], [-8, -80], [-9, -90]]
* ]]
*
* Expected Output values as NWC sized (3,3,2):
* [[
* [[0, 0], [0, 0], [0, 0]],
* [[0, 0], [0, 0], [0, 0]],
* [[0, 0], [0, 0], [0, 0]]
* ]]
*/
void zdnn_leaky_relu_deadneuron_3d_basic() {
// Initialize the dimensions for our input tensor ZDNN_3D
uint32_t shape[] = {3, 3, 2}; // Will be same for in and out dim.
float input_values[] = {-1, -10, -2, -20, -3, -30, -4, -40, -5,
-50, -6, -60, -7, -70, -8, -80, -9, -90};
float expected_values[] = {0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0};
float adj_factor = 0;
zdnn_leaky_relu_test(shape, ZDNN_3D, input_values, NULL, adj_factor, ZDNN_OK,
expected_values);
}
void zdnn_leaky_relu_deadneuron_3d_basic_adj() {
// Initialize the dimensions for our input tensor ZDNN_3D
uint32_t shape[] = {3, 3, 2}; // Will be same for in and out dim.
float input_values[] = {-1, -10, -2, -20, -3, -30, -4, -40, -5,
-50, -6, -60, -7, -70, -8, -80, -9, -90};
float expected_values[] = {-.1, -1.0, -.2, -2.0, -.3, -3.0,
-.4, -4.0, -.5, -5.0, -.6, -6.0,
-.7, -7.0, -.8, -8.0, -.9, -9.0};
float adj_factor = 0.1;
zdnn_leaky_relu_test(shape, ZDNN_3D, input_values, NULL, adj_factor, ZDNN_OK,
expected_values);
}
/*
------------------------------------------------------------------------------
ReLU Basic
Layout: NHWC
------------------------------------------------------------------------------
*/
/**
* zdnn_leaky_relu_balance_nhwc_basic
*
* Simple test of half positive and half negative input values
* Expect 50% zeroed 50% valued
*
* Input values as NHWC
* [[
* [[10, -10], [20, -20], [30, -30]],
* [[40, -40], [50, -50], [60, -60]],
* [[70, -70], [80, -80], [90, -90]],
* ]]
*
* Expected Output values as NHWC
* [[
* [[10, 0], [20, 0], [30, 0]],
* [[40, 0], [50, 0], [60, 0]],
* [[70, 0], [80, 0], [90, 0]],
* ]]
*/
void zdnn_leaky_relu_balance_nhwc_basic() {
// Initialize the dimensions for our input tensor
uint32_t shape[] = {1, 3, 3, 2}; // Will be same for in and out dim.
float input_values[] = {10, -10, 20, -20, 30, -30, 40, -40, 50,
-50, 60, -60, 70, -70, 80, -80, 90, -90};
float expected_values[] = {10, 0, 20, 0, 30, 0, 40, 0, 50,
0, 60, 0, 70, 0, 80, 0, 90, 0};
float adj_factor = 0;
zdnn_leaky_relu_test(shape, ZDNN_NHWC, input_values, NULL, adj_factor,
ZDNN_OK, expected_values);
}
void zdnn_leaky_relu_balance_nhwc_basic_adj() {
// Initialize the dimensions for our input tensor
uint32_t shape[] = {1, 3, 3, 2}; // Will be same for in and out dim.
float input_values[] = {10, -10, 20, -20, 30, -30, 40, -40, 50,
-50, 60, -60, 70, -70, 80, -80, 90, -90};
float expected_values[] = {10, -1.0, 20, -2.0, 30, -3.0, 40, -4.0, 50, -5.0,
60, -6.0, 70, -7.0, 80, -8.0, 90, -9.0};
float adj_factor = 0.1;
zdnn_leaky_relu_test(shape, ZDNN_NHWC, input_values, NULL, adj_factor,
ZDNN_OK, expected_values);
}
/*
------------------------------------------------------------------------------
ReLU Basic
Layout: NHWC
------------------------------------------------------------------------------
*/
/**
* zdnn_leaky_relu_balance_nhwc_basic_clip50
*
* Simple test of half positive and half negative input values
* Expect 50% zeroed 50% valued
*
* Input values as NHWC
* [[
* [[10, -10], [20, -20], [30, -30]],
* [[40, -40], [50, -50], [60, -60]],
* [[70, -70], [80, -80], [90, -90]],
* ]]
*
* Expected Output values as NHWC
* [[
* [[10, 0], [20, 0], [30, 0]],
* [[40, 0], [50, 0], [50, 0]],
* [[50, 0], [50, 0], [50, 0]],
* ]]
*/
void zdnn_leaky_relu_balance_nhwc_basic_clip50() {
// Initialize the dimensions for our input tensor
uint32_t shape[] = {1, 3, 3, 2}; // Will be same for in and out dim.
float input_values[] = {10, -10, 20, -20, 30, -30, 40, -40, 50,
-50, 60, -60, 70, -70, 80, -80, 90, -90};
float expected_values[] = {10, 0, 20, 0, 30, 0, 40, 0, 50,
0, 50, 0, 50, 0, 50, 0, 50, 0};
float clip_value = 50;
float adj_factor = 0;
zdnn_leaky_relu_test(shape, ZDNN_NHWC, input_values, &clip_value, adj_factor,
ZDNN_OK, expected_values);
}
void zdnn_leaky_relu_balance_nhwc_basic_clip50_adj() {
// Initialize the dimensions for our input tensor
uint32_t shape[] = {1, 3, 3, 2}; // Will be same for in and out dim.
float input_values[] = {10, -10, 20, -20, 30, -30, 40, -40, 50,
-50, 60, -60, 70, -70, 80, -80, 90, -90};
float expected_values[] = {10, -1.0, 20, -2.0, 30, -3.0, 40, -4.0, 50, -5.0,
50, -6.0, 50, -7.0, 50, -8.0, 50, -9.0};
float clip_value = 50;
float adj_factor = 0.1;
zdnn_leaky_relu_test(shape, ZDNN_NHWC, input_values, &clip_value, adj_factor,
ZDNN_OK, expected_values);
}
/*
------------------------------------------------------------------------------
ReLU Large
Layout: NHWC
------------------------------------------------------------------------------
*/
/**
* zdnn_leaky_relu_basic_nhwc_large
*
* Simple test of all positive input values
* Expect a mirror of the Input values as the Output values
*
* Input values as NHWC
* [[
* [[65000, 65100, 65200], [64000, 64100, 64200], [63000, 63100, 63200]],
* [[62000, 62100, 62200], [61000, 61100, 61200], [60000, 60100, 60200]],
* [[59000, 59100, 59200], [58000, 58100, 58200], [57000, 57100, 57200]]
* ]]
*
* Expected Output values as NHWC
* [[
* [[65000, 65100, 65200], [64000, 64100, 64200], [63000, 63100, 63200]],
* [[62000, 62100, 62200], [61000, 61100, 61200], [60000, 60100, 60200]],
* [[59000, 59100, 59200], [58000, 58100, 58200], [57000, 57100, 57200]]
* ]]
*
*/
void zdnn_leaky_relu_basic_nhwc_large() {
// Initialize the dimensions for our input tensor ZDNN_NHWC
uint32_t shape[] = {1, 3, 3, 3}; // Will be same for in and out dim.
float input_expected_values[] = {
65000, 65100, 65200, 64000, 64100, 64200, 63000, 63100, 63200,
62000, 62100, 62200, 61000, 61100, 61200, 60000, 60100, 60200,
59000, 59100, 59200, 58000, 58100, 58200, 57000, 57100, 57200};
float adj_factor = 0;
zdnn_leaky_relu_test(shape, ZDNN_NHWC, input_expected_values, NULL,
adj_factor, ZDNN_OK, input_expected_values);
}
void zdnn_leaky_relu_basic_nhwc_large_adj() {
// Initialize the dimensions for our input tensor ZDNN_NHWC
uint32_t shape[] = {1, 3, 3, 3}; // Will be same for in and out dim.
float input_expected_values[] = {
65000, 65100, 65200, 64000, 64100, 64200, 63000, 63100, 63200,
62000, 62100, 62200, 61000, 61100, 61200, 60000, 60100, 60200,
59000, 59100, 59200, 58000, 58100, 58200, 57000, 57100, 57200};
float adj_factor = 0.1;
zdnn_leaky_relu_test(shape, ZDNN_NHWC, input_expected_values, NULL,
adj_factor, ZDNN_OK, input_expected_values);
}
/*
------------------------------------------------------------------------------
ReLU Large
Layout: ZDNN_3D
------------------------------------------------------------------------------
*/
/**
* zdnn_leaky_relu_deadneuron_3d_large
*
* Simple test of all negative input values
* Expect a dead neuron
*
* Generate a test that is of size 8x8x8
* and use automatic float generator to create
* input values.
*
* Output will contain tensor of size size 8x8x8
* with all 0 zeros.
*/
void zdnn_leaky_relu_deadneuron_3d_large() {
// Initialize the dimensions for our input tensor ZDNN_3D
uint32_t shape[] = {8, 8, 8}; // Will be same for in and out dim.
int num_io_buffer_values = shape[0] * shape[1] * shape[2];
float input_values[num_io_buffer_values];
gen_random_float_array_neg(num_io_buffer_values, input_values);
float expected_values[num_io_buffer_values];
gen_float_array_zeros(num_io_buffer_values, expected_values);
float adj_factor = 0;
zdnn_leaky_relu_test(shape, ZDNN_3D, input_values, NULL, adj_factor, ZDNN_OK,
expected_values);
}
void zdnn_leaky_relu_deadneuron_3d_large_adj() {
// Initialize the dimensions for our input tensor ZDNN_3D
uint32_t shape[] = {8, 8, 8}; // Will be same for in and out dim.
int num_io_buffer_values = shape[0] * shape[1] * shape[2];
float input_values[num_io_buffer_values];
gen_random_float_array_neg(num_io_buffer_values, input_values);
float adj_factor = 0.1;
float expected_values[num_io_buffer_values];
for (int i = 0; i < num_io_buffer_values; i++) {
expected_values[i] = input_values[i] * adj_factor;
}
zdnn_leaky_relu_test(shape, ZDNN_3D, input_values, NULL, adj_factor, ZDNN_OK,
expected_values);
}
/*
------------------------------------------------------------------------------
ReLU Large
Layout: NHWC
------------------------------------------------------------------------------
*/
/**
* zdnn_leaky_relu_balance_nhwc_large
*
* Simple test of half positive and half negative input values
* Expect 50% zeroed 50% valued
*
* Generate a test that is of size 50x25x10x1
* and use automatic float generator to create
* input values.
*
* Output will contain tensor of size size 50x25x10x1
* with 50% zeros 50% valued.
*
*
*/
void zdnn_leaky_relu_balance_nhwc_large() {
// Initialize the dimensions for our input tensor
uint32_t shape[] = {1, 10, 25, 50}; // Will be same for in and out dim.
int num_io_buffer_values = shape[0] * shape[1] * shape[2] * shape[3];
float input_values[num_io_buffer_values];
gen_random_float_array_pos_neg(num_io_buffer_values, input_values);
float expected_values[num_io_buffer_values];
copy_to_array(num_io_buffer_values, input_values, expected_values);
fill_everyother_with_zero_float_array(num_io_buffer_values, expected_values);
float adj_factor = 0;
zdnn_leaky_relu_test(shape, ZDNN_NHWC, input_values, NULL, adj_factor,
ZDNN_OK, expected_values);
}
void zdnn_leaky_relu_balance_nhwc_large_adj() {
// Initialize the dimensions for our input tensor
uint32_t shape[] = {1, 10, 25, 50}; // Will be same for in and out dim.
int num_io_buffer_values = shape[0] * shape[1] * shape[2] * shape[3];
float input_values[num_io_buffer_values];
gen_random_float_array_pos_neg(num_io_buffer_values, input_values);
float adj_factor = 0.1;
float expected_values[num_io_buffer_values];
for (int i = 0; i < num_io_buffer_values; i += 2) {
expected_values[i] = input_values[i];
expected_values[i + 1] = input_values[i + 1] * adj_factor;
}
zdnn_leaky_relu_test(shape, ZDNN_NHWC, input_values, NULL, adj_factor,
ZDNN_OK, expected_values);
}
int main() {
UNITY_BEGIN();
RUN_TEST_ALL_DLFLOAT16_PRE_DATATYPES(zdnn_leaky_relu_basic_nhwc_basic);
RUN_TEST_ALL_DLFLOAT16_PRE_DATATYPES(zdnn_leaky_relu_basic_nhwc_basic_adj);
RUN_TEST_ALL_DLFLOAT16_PRE_DATATYPES(zdnn_leaky_relu_basic_nhwc_large);
RUN_TEST_ALL_DLFLOAT16_PRE_DATATYPES(zdnn_leaky_relu_basic_nhwc_large_adj);
RUN_TEST_ALL_DLFLOAT16_PRE_DATATYPES(zdnn_leaky_relu_deadneuron_3d_basic);
RUN_TEST_ALL_DLFLOAT16_PRE_DATATYPES(zdnn_leaky_relu_deadneuron_3d_basic_adj);
RUN_TEST_ALL_DLFLOAT16_PRE_DATATYPES(zdnn_leaky_relu_balance_nhwc_basic);
RUN_TEST_ALL_DLFLOAT16_PRE_DATATYPES(zdnn_leaky_relu_balance_nhwc_basic_adj);
RUN_TEST_ALL_DLFLOAT16_PRE_DATATYPES(zdnn_leaky_relu_deadneuron_3d_large);
RUN_TEST_ALL_DLFLOAT16_PRE_DATATYPES(zdnn_leaky_relu_deadneuron_3d_large_adj);
RUN_TEST_ALL_DLFLOAT16_PRE_DATATYPES(zdnn_leaky_relu_balance_nhwc_large);
RUN_TEST_ALL_DLFLOAT16_PRE_DATATYPES(zdnn_leaky_relu_balance_nhwc_large_adj);
RUN_TEST_ALL_DLFLOAT16_PRE_DATATYPES(zdnn_leaky_relu_basic_nhwc_basic_clip6);
RUN_TEST_ALL_DLFLOAT16_PRE_DATATYPES(
zdnn_leaky_relu_basic_nhwc_basic_clip6_adj);
RUN_TEST_ALL_DLFLOAT16_PRE_DATATYPES(
zdnn_leaky_relu_balance_nhwc_basic_clip50);
RUN_TEST_ALL_DLFLOAT16_PRE_DATATYPES(
zdnn_leaky_relu_balance_nhwc_basic_clip50_adj);
return UNITY_END();
}
zDNN-1.1.2/tests/testDriver_zdnn_log_elwise.c 0000664 0000000 0000000 00000010005 15000221702 0021210 0 ustar 00root root 0000000 0000000 // SPDX-License-Identifier: Apache-2.0
/*
* Copyright IBM Corp. 2021, 2024
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#include "common_elwise.h"
void setUp(void) { VERIFY_HW_ENV; }
void tearDown(void) {}
/*
* Simple test to drive a full log api.
*/
void api_log_basic() {
/* Input values as true NHWC sized (1,2,2,2)
[[
[[3, 30], [6, 60]],
[[8, 80], [3, 10]]
]]
*/
// Values in ZDNN_NHWC order
uint32_t shape[] = {1, 2, 2, 2};
float input_values[] = {3, 30, 6, 60, 8, 80, 3, 10};
/* Expected values as true NHWC sized (1,2,2,2)
[[
[[1.09861228, 3.40119738], [1.79175946, 4.09434456]],
[[2.07944154, 4.38202663], [1.09861228, 2.30258509]]
]]
*/
test_elwise_api_1_input(shape, ZDNN_NHWC, input_values, NNPA_LOG, ZDNN_OK);
}
// test to drive input tensors with 280 values in their buffer.
void api_log_med_dims() {
uint32_t shape[] = {1, 7, 10, 4};
int num_io_buffer_values = shape[0] * shape[1] * shape[2] * shape[3];
// Values in ZDNN_NHWC order
float input_values[num_io_buffer_values];
gen_random_float_array(num_io_buffer_values, input_values);
test_elwise_api_1_input(shape, ZDNN_NHWC, input_values, NNPA_LOG, ZDNN_OK);
}
// test to drive an input tensor with 6825 values in its buffer
void api_log_high_dims() {
uint32_t shape[] = {1, 3, 33, 65};
int num_io_buffer_values = shape[0] * shape[1] * shape[2] * shape[3];
// Values in ZDNN_NHWC order
float input_values[num_io_buffer_values];
gen_random_float_array(num_io_buffer_values, input_values);
test_elwise_api_1_input(shape, ZDNN_NHWC, input_values, NNPA_LOG, ZDNN_OK);
}
/*
* Simple test to drive a full log api using Data type and a
* 3D layout
*/
void api_log_3D() {
/* Input 1 values as true NHWC sized (1,2,2,2)
[[
[[3, 30], [6, 60]],
[[8, 80], [9, 90]]
]]
*/
// Values in ZDNN_NHWC order
uint32_t shape[] = {2, 2, 2};
float input_values[] = {3, 30, 6, 60, 8, 80, 9, 90};
/* Expected values as true NHWC sized (1,2,2,2)
[[
[[1.09861228, 3.40119738], [1.79175946, 4.09434456]],
[[2.07944154, 4.38202663], [2.19722457, 4.49980967]]
]]
*/
test_elwise_api_1_input(shape, ZDNN_3D, input_values, NNPA_LOG, ZDNN_OK);
}
/*
* Simple test to drive a full log api using the data type
* and 2 dimensional tensors
*/
void api_log_2D() {
// Values in ZDNN_NHWC order
uint32_t shape[] = {2, 2};
/* Input 1 values as true NHWC sized (1,1,2,2)
[[
[[1, 10], [2, 6]]
]]
*/
float input_values[] = {1, 10, 2, 6};
/* Expected values as true NHWC sized (1,1,2,2)
[[
[[0, 2.30258509], [0.69314718, 1.79175946]]
]]
*/
test_elwise_api_1_input(shape, ZDNN_2D, input_values, NNPA_LOG, ZDNN_OK);
}
/*
* Simple test to drive a full log api using the data type
* and 1 dimensional tensors
*/
void api_log_1D() {
// Values in ZDNN_NHWC order
uint32_t shape[] = {2};
/* Input 1 values as true NHWC sized (1,1,2,2)
[[
[[6, 7]]
]]
*/
float input_values[] = {6, 7};
/* Expected values as true NHWC sized (1,1,2,2)
[[
[[1.79175946, 1.94591014]]
]]
*/
test_elwise_api_1_input(shape, ZDNN_1D, input_values, NNPA_LOG, ZDNN_OK);
}
int main() {
UNITY_BEGIN();
RUN_TEST_ALL_DLFLOAT16_PRE_DATATYPES(api_log_basic);
RUN_TEST_ALL_DLFLOAT16_PRE_DATATYPES(api_log_med_dims);
RUN_TEST_ALL_DLFLOAT16_PRE_DATATYPES(api_log_high_dims);
RUN_TEST_ALL_DLFLOAT16_PRE_DATATYPES(api_log_3D);
RUN_TEST_ALL_DLFLOAT16_PRE_DATATYPES(api_log_2D);
RUN_TEST_ALL_DLFLOAT16_PRE_DATATYPES(api_log_1D);
return UNITY_END();
}
zDNN-1.1.2/tests/testDriver_zdnn_lstm_dual_layers.c 0000664 0000000 0000000 00000176307 15000221702 0022444 0 ustar 00root root 0000000 0000000 // SPDX-License-Identifier: Apache-2.0
/*
* Copyright IBM Corp. 2021, 2024
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#include "testsupport.h"
void setUp(void) { VERIFY_HW_ENV; }
void tearDown(void) {}
zdnn_ztensor *
test_layer(zdnn_ztensor *input, uint32_t *h0_shape, void *h0_values,
uint32_t *c0_shape, void *c0_values, uint32_t *weights_shape,
void *weights_values, uint32_t *biases_shape, void *biases_values,
uint32_t *hidden_weights_shape, void *hidden_weights_values,
uint32_t *hidden_biases_shape, void *hidden_biases_values,
uint32_t *all_ts_out_shape, void *all_ts_out_exp_values,
uint32_t *cell_out_shape, void *cell_out_exp_values,
bool is_prev_layer_bidir, bool is_this_layer_bidir) {
zdnn_ztensor *h0, *c0, *weights, *biases, *hidden_weights, *hidden_biases,
*all_ts_out, *cell_out;
h0 = alloc_ztensor_with_values(h0_shape, ZDNN_3DS, test_datatype, NO_CONCAT,
false, (float *)h0_values);
c0 = alloc_ztensor_with_values(c0_shape, ZDNN_3DS, test_datatype, NO_CONCAT,
false, (float *)c0_values);
// FICO/ZRH elements coming in as one pointer instead of four or three
// pointers
uint32_t num_elements_weights =
weights_shape[0] * weights_shape[1] * weights_shape[2];
weights = alloc_ztensor_with_values(
weights_shape, ZDNN_3DS, test_datatype,
RNN_TYPE_LSTM |
(is_prev_layer_bidir ? PREV_LAYER_BIDIR : PREV_LAYER_UNI) |
USAGE_WEIGHTS,
false, (float *)weights_values,
(float *)weights_values + num_elements_weights,
(float *)weights_values + 2 * num_elements_weights,
(float *)weights_values + 3 * num_elements_weights);
uint32_t num_elements_biases = biases_shape[0] * biases_shape[1];
biases = alloc_ztensor_with_values(
biases_shape, ZDNN_2DS, test_datatype, RNN_TYPE_LSTM | USAGE_BIASES,
false, (float *)biases_values,
(float *)biases_values + num_elements_biases,
(float *)biases_values + 2 * num_elements_biases,
(float *)biases_values + 3 * num_elements_biases);
uint32_t num_elements_hidden_weights = hidden_weights_shape[0] *
hidden_weights_shape[1] *
hidden_weights_shape[2];
hidden_weights = alloc_ztensor_with_values(
hidden_weights_shape, ZDNN_3DS, test_datatype,
RNN_TYPE_LSTM | USAGE_HIDDEN_WEIGHTS, false,
(float *)hidden_weights_values,
(float *)hidden_weights_values + num_elements_hidden_weights,
(float *)hidden_weights_values + 2 * num_elements_hidden_weights,
(float *)hidden_weights_values + 3 * num_elements_hidden_weights);
uint32_t num_elements_hidden_biases =
hidden_biases_shape[0] * hidden_biases_shape[1];
hidden_biases = alloc_ztensor_with_values(
hidden_biases_shape, ZDNN_2DS, test_datatype,
RNN_TYPE_LSTM | USAGE_HIDDEN_BIASES, false, (float *)hidden_biases_values,
(float *)hidden_biases_values + num_elements_hidden_biases,
(float *)hidden_biases_values + 2 * num_elements_hidden_biases,
(float *)hidden_biases_values + 3 * num_elements_hidden_biases);
all_ts_out = alloc_ztensor_with_values(
all_ts_out_shape, ZDNN_4DS, test_datatype, NO_CONCAT, true, ZERO_ARRAY);
cell_out = alloc_ztensor_with_values(cell_out_shape, ZDNN_4DS, test_datatype,
NO_CONCAT, true, ZERO_ARRAY);
zdnn_status status =
zdnn_lstm(input, h0, c0, weights, biases, hidden_weights, hidden_biases,
is_this_layer_bidir ? BIDIR : FWD, NULL, all_ts_out, cell_out);
if (status != ZDNN_OK) {
TEST_FAIL_MESSAGE_FORMATTED("%s() - zdnn_lstm() not ZDNN_OK, status = %08x",
__func__, status);
}
assert_ztensor_values(all_ts_out, false, all_ts_out_exp_values);
assert_ztensor_values(cell_out, false, cell_out_exp_values);
free_ztensor_buffers(7, h0, c0, weights, biases, hidden_weights,
hidden_biases, cell_out);
return all_ts_out;
}
void lstm_fwd_to_fwd() {
// num_timesteps = 5
// num_batches = 2
// num_features = 4
// num_hidden = 4, 5
bool is_layer_bidir[] = {false, false};
// first layer
uint32_t input0_shape[] = {5, 2, 4};
uint32_t input0_values[] = {
0x3f80f554, 0x3eed5744, 0x3fe9598b, 0x3fde3340, 0x3fb14cbd, 0x3f3b5a0a,
0x3f82893d, 0x3e5414c8, 0x3f8b5bf7, 0x3f3c425a, 0x3fa6aeeb, 0x3f99290e,
0x3ffa48dc, 0x3fd4c5a9, 0x3fb4c3ba, 0x3f768450, 0x3f1acb50, 0x3eccc9d0,
0x3fd6c6c6, 0x3fb7bd3f, 0x3f230434, 0x3e2daec8, 0x3f9a57a9, 0x3e80dd48,
0x3f94a1a8, 0x3f64e95e, 0x3dc195b0, 0x3ff6bde7, 0x3fd094b3, 0x3fa067b8,
0x3fb1e4f7, 0x3e0b4360, 0x3fd2f78d, 0x3fbaec30, 0x3fd96d0d, 0x3ff7e13b,
0x3fcab802, 0x3e0fc588, 0x3f0dc4a2, 0x3f03ec80};
uint32_t h00_shape[] = {1, 2, 4};
uint32_t h00_values[] = {0x3f72895c, 0x3fc19f9d, 0x3f54b050, 0x3ff7834f,
0x3fdc7d0d, 0x3fc1fce3, 0x3ebcf5b4, 0x3ed3cdb4};
uint32_t c00_shape[] = {1, 2, 4};
uint32_t c00_values[] = {0x3fb8c472, 0x3f849e59, 0x3eb88b80, 0x3bc03f00,
0x3f1a65ee, 0x3f5d6a8e, 0x3ea8b604, 0x3fcb5de0};
uint32_t weights0_shape[] = {1, 4, 4};
uint32_t weights0_values[] = {
0x3e493898, 0x3dcbca78, 0xbeee948c, 0x3dbfaa08, 0x3ed41bd0, 0xbede9cf8,
0x3ee3743e, 0xbdac80c8, 0x3edec5e8, 0x3d3c6690, 0x3ec2a6f0, 0xbda882b8,
0x3ee1e222, 0xbea027ac, 0xbeff5dfe, 0xbe6a5f1c, 0x3dbed0f8, 0x3e67aa8c,
0x3e8c896e, 0x3e9ed100, 0xbec67a6c, 0x3e4de7f8, 0xbd813f20, 0x3ef5cf48,
0xbeb16e18, 0xbe97a46a, 0x3c9c6440, 0xbec54796, 0xbe843ed2, 0x3e1aadc8,
0x3ded4400, 0xbe3ba1ec, 0x3e44e48c, 0x3eb7435c, 0x3e7fa638, 0x3ef0d4f2,
0xbe97a134, 0x3e3f7148, 0x3dd65318, 0x3eac7f54, 0x3e3bb1dc, 0xbefd5f4a,
0xbec7b396, 0xbe5f3eb0, 0x3e817616, 0xbea61100, 0xbe9368e8, 0xbe00dcd4,
0xbef3dd78, 0xbce1b020, 0xbe9bc938, 0xbdfedb88, 0xbc133e80, 0x3d99bfa0,
0x3ee84968, 0x3cb8d280, 0xbec0c878, 0xbe51adf4, 0x3eaf6fd0, 0x3d956718,
0xbec577a2, 0x3e97e798, 0xbed7e164, 0x3df2ddd0};
uint32_t biases0_shape[] = {1, 4};
uint32_t biases0_values[] = {0xbed2f700, 0x3e8ab64c, 0x3ecb4226, 0x3eaf295c,
0x3e7604cc, 0x3e34d140, 0xbdf63f00, 0xbdd75a50,
0xbeb493ac, 0x3cb6ae60, 0xbeb47690, 0xbe8cec88,
0xbe952c30, 0x3e2ef934, 0xbe988dc4, 0xbc32ba00};
uint32_t hidden_weights0_shape[] = {1, 4, 4};
uint32_t hidden_weights0_values[] = {
0x3c63ac80, 0x3ef27eba, 0xbee5f866, 0xbe7e7cdc, 0xbd30adc0, 0x3ea29306,
0xbe72ba40, 0xbec42d02, 0x3dcf3d10, 0x3ef30cc4, 0x3eae4fce, 0xbeef9400,
0x3ea2c660, 0xbd141d60, 0xbb546b00, 0xbed810ec, 0x3e48ec5c, 0xbef59156,
0xbe331d5c, 0xbea6c676, 0x3cf559e0, 0xbe97bba0, 0xbed1d2ba, 0xbcd23440,
0xbe79d1cc, 0xbe002b3c, 0xbdd9d200, 0x3eb74200, 0x3e7245d4, 0xbe7966ec,
0x3ddae2f8, 0xbd5288f0, 0xbdcfb470, 0x3e9fb02e, 0xbdc53cf0, 0xbda03c28,
0x3e8c6456, 0xbec8528a, 0xbdc90e10, 0x3bd3e180, 0x3e8a6774, 0xbdd87bf0,
0xbee5b8ba, 0xbe6896b8, 0xbef6e502, 0xbe4f9a1c, 0xbedd0a44, 0x3e40deb8,
0xbee66a3a, 0x3ee72b36, 0xbd6c53f0, 0x3d5bc2b0, 0xbd0a36c0, 0x3e396c38,
0xbe648f70, 0xbdd664c0, 0x3ee121a2, 0xbee707ae, 0x3eccb614, 0x3eb6d016,
0xbe50d738, 0x3ea1f874, 0xbecedf54, 0x3e0eec08};
uint32_t hidden_biases0_shape[] = {1, 4};
uint32_t hidden_biases0_values[] = {
0xbe94a63c, 0x3eb32ed6, 0xbe380bcc, 0x3ed7eee0, 0x3cac4fa0, 0x3ea50604,
0xbec183fa, 0xbeafbf44, 0x3e3924a0, 0x3d81aa40, 0xbb73ed00, 0xbdca6d08,
0x3d807a40, 0xbde9d330, 0xbb663e00, 0x3d82a7c0};
uint32_t all_ts_out0_shape[] = {5, 1, 2, 4};
uint32_t all_ts_out0_exp_values[] = {
0x3d7dec3b, 0x3ec321c6, 0xbd33532c, 0xbd2813ae, 0x3d517efa, 0x3ee7cc40,
0xbd18113a, 0x3e9092b4, 0x3df21ddc, 0xbb310d91, 0xbe18e945, 0xbce56e8b,
0x3d359756, 0x3e238b20, 0xbe0c1333, 0x3e987c64, 0x3dd11579, 0xbe65fb8f,
0xbe55c118, 0xbda5413b, 0x3d7d912d, 0xbcbef32e, 0xbe3ba9c3, 0x3e30db74,
0x3dc41819, 0xbd0f17f6, 0xbdbe76a3, 0x3beeb838, 0x3cc76662, 0x3d477816,
0xbe6299de, 0x3e62029a, 0x3d3e7a2d, 0xbe47d763, 0xbe4eef76, 0x3d90d525,
0x3cfd8dfe, 0x3de03933, 0xbdb0d31e, 0x3e51f80d};
uint32_t cell_out0_shape[] = {1, 1, 2, 4};
uint32_t cell_out0_values[] = {0x3f3626d4, 0xbeab179c, 0xbf0f2c94,
0x3e03c918, 0x3e366377, 0x3e49c51e,
0xbea2454d, 0x3eeb4eda};
zdnn_ztensor *input0 =
alloc_ztensor_with_values(input0_shape, ZDNN_3DS, test_datatype,
NO_CONCAT, false, (void *)input0_values);
zdnn_ztensor *all_ts_out0 =
test_layer(input0, h00_shape, (void *)h00_values, c00_shape,
(void *)c00_values, weights0_shape, (void *)weights0_values,
biases0_shape, (void *)biases0_values, hidden_weights0_shape,
(void *)hidden_weights0_values, hidden_biases0_shape,
(void *)hidden_biases0_values, all_ts_out0_shape,
(void *)all_ts_out0_exp_values, cell_out0_shape,
(void *)cell_out0_values, false, is_layer_bidir[0]);
// second layer
uint32_t h01_shape[] = {1, 2, 5};
uint32_t h01_values[] = {0x3fe41f4c, 0x3fc316bd, 0x3e4520a0, 0x3fe7e0c3,
0x3f8930f2, 0x3c305000, 0x3f2385f8, 0x3f78c07a,
0x3feeed13, 0x3f012eea};
uint32_t c01_shape[] = {1, 2, 5};
uint32_t c01_values[] = {0x3fdb2c04, 0x3fa455aa, 0x3faaf233, 0x3f92f487,
0x3f7d3326, 0x3e3365a8, 0x3f600a90, 0x3dd59f00,
0x3ec6cda0, 0x3fd0ec63};
uint32_t weights1_shape[] = {1, 4, 5};
uint32_t weights1_values[] = {
0xbee2f252, 0xbe83e971, 0x3e261b9c, 0x3ebc395c, 0xbed87cba, 0x3e4eb6e0,
0x3e83a64e, 0xbe18ef8e, 0x3df90638, 0x3dc5e080, 0xbe5bef69, 0x3e72843c,
0xbdfff1d8, 0xbe58ace5, 0xbe807ef0, 0xbe98cd9b, 0x3eafbdf8, 0x3e074a8c,
0xbe574539, 0xbecc25a9, 0x3e4d6418, 0x3e1735dc, 0x3e6bc304, 0x3bc9f900,
0x3ebd57a4, 0xbea1dc41, 0x3eabc840, 0xbedd7037, 0xbd9bb79c, 0xbe05dde4,
0xbe4216de, 0xbe136b9a, 0x3ea58b16, 0xbe71302b, 0xbe87f0ac, 0x3e06b148,
0x3a3bae00, 0xbebd57dc, 0x3d721bf0, 0xbe295dd2, 0x3ec8c806, 0x3c696e40,
0x3d952498, 0xbdd1716c, 0x3e10c984, 0xbe94b2a7, 0xbe9a126e, 0x3ebdf640,
0xbca2ec60, 0xbc172600, 0xbd506310, 0xbeb9158a, 0xbe1985b8, 0xbe8b7474,
0xbdcdfc84, 0x3e0332e4, 0x3eb682ba, 0x3e06e404, 0x3e1ae0c4, 0xbe8d2560,
0x3e9e5868, 0xbe5cf1d4, 0x3db3dd28, 0x3e354500, 0x3e1f0c64, 0xbeaa740b,
0x3da48928, 0xbeaa02a3, 0xbd669538, 0xbe271822, 0x3e3b9ae0, 0xbe6de235,
0x3d987eb0, 0xbebbb5a4, 0x3e2dd3f4, 0xbe19fc78, 0x3dd306b8, 0x3e430d88,
0xbd8d3050, 0x3e987cda};
uint32_t biases1_shape[] = {1, 5};
uint32_t biases1_values[] = {0xbe8d6097, 0x3cbbc620, 0x3e5b9460, 0x3e6328f4,
0xbed14480, 0xbdf89d64, 0xbdb0b3c4, 0x3d2f4d98,
0x3ed3c2e8, 0x3cc42620, 0xbda3e468, 0xbeaa2909,
0xbe436636, 0x3e24fec4, 0xbea299d4, 0x3e2a3b28,
0x3ec258fa, 0x3cf049b0, 0xbe989ba1, 0xbe24134e};
uint32_t hidden_weights1_shape[] = {1, 5, 5};
uint32_t hidden_weights1_values[] = {
0xbe20b66c, 0x3e5d42c8, 0x3eb2e8ec, 0xbe1b9f76, 0xbee1e40d, 0x3ebbff92,
0x3e79a49c, 0xbda4ce70, 0x3e5f481c, 0xbeb7e0dd, 0x3e804fe0, 0xbe8f83dc,
0x3e3248cc, 0xbe9fee66, 0x3eb4c482, 0xbe89ca96, 0x3e036284, 0x3da2aec0,
0x3dcedbf0, 0x3e77c3f8, 0x3ecdd9da, 0xbe2089f6, 0x3e42d780, 0xbe9aebe8,
0x3ed6d390, 0xbe6f1e1b, 0x3d64ba10, 0x3e86944e, 0xbec4e626, 0x3eace9cc,
0x3b9084c0, 0xbeb401c3, 0x3d757710, 0x3ee46d12, 0x3ee4e29a, 0x3e3d5c4c,
0x3d0ff4f8, 0x3e55de1c, 0x3e915990, 0x3ec83690, 0x3d0f9250, 0x3e9e8ea0,
0xbe5ec821, 0xbe9e462c, 0x3eb165e2, 0x3d190310, 0x3ece54c0, 0xbebdbf60,
0x3e332b14, 0xbdd1fa20, 0x3eb76f78, 0x3e9eff90, 0x3ebc5350, 0xbdea86c4,
0x3e6d851c, 0xbecc7bce, 0xbead1b0c, 0x3ebb7968, 0x3e497f5c, 0x3e8e5746,
0xbe9c4230, 0xbe5f6ed0, 0x3ea3e864, 0x3ecbffd4, 0xbe20471c, 0xbd93fe10,
0xbedd358e, 0x3eb0cbec, 0x3e177f54, 0x3e5bbc44, 0xbe94b3e4, 0xbe81ffa5,
0x3ecb999a, 0x3ee4e636, 0x3d527bf8, 0xbddabb30, 0x3ea2c8c8, 0x3d082a00,
0x3edb2580, 0xbd8f889c, 0xbe811315, 0xbd507b08, 0xbe58367b, 0x3eade05a,
0x3ec26bea, 0xbe807b12, 0xbe8480f1, 0x3ed26ffe, 0xbe26eada, 0x3c976030,
0xbeb030cc, 0x3eb0f98a, 0x3e3b45e4, 0x3e80b7ea, 0xbea1ef22, 0x3e99b77e,
0x3e926d0a, 0xbeadd2f6, 0xbe8219a4, 0xbe190f96};
uint32_t hidden_biases1_shape[] = {1, 5};
uint32_t hidden_biases1_values[] = {
0xbede7c01, 0xbdb9cd1c, 0x3e99f81e, 0xbed8b7ed, 0x3ebe51d8,
0x3dc7ff90, 0xbeae8cee, 0x3e63833c, 0xbecfe0c1, 0xbedc1c4e,
0xbe37306c, 0x3e062014, 0x3ca711d0, 0xbece783b, 0x3ebde4ee,
0x3e769414, 0x3ee39938, 0x3e675c3c, 0xbe972362, 0x3ebf18f2};
uint32_t all_ts_out1_shape[] = {5, 1, 2, 5};
uint32_t all_ts_out1_exp_values[] = {
0x3e10591e, 0x3ea4c525, 0x3ede5521, 0x3ee29046, 0x3ebfbb06, 0xbdf93f6b,
0xbd444231, 0x3e873334, 0x3e5763af, 0x3ecbdbf2, 0xbd65671e, 0xbd55c4a2,
0x3efdcab4, 0x3e2c772b, 0x3eac09f7, 0xbe4fcd44, 0xbe0e3377, 0x3e535bcd,
0x3dbbe197, 0x3ead8e9b, 0xbe1ad9da, 0xbdfa7c72, 0x3ee06835, 0x3dbbcd47,
0x3e6c8300, 0xbe44c5f7, 0xbe0ce1ad, 0x3e0a1251, 0x3d4e3a71, 0x3e54c2a1,
0xbe35533e, 0xbe198119, 0x3ea44292, 0x3d3cfc26, 0x3e0b0c38, 0xbe2a6eaa,
0xbdee377c, 0x3d8c79a9, 0x3cb2aedd, 0x3ddb9885, 0xbe238eaf, 0xbde48ec9,
0x3e3951bc, 0x3cb1df06, 0x3d89af34, 0xbe15c676, 0xbde42498, 0x3c0f48aa,
0xbc2b888c, 0x3d376f4d};
uint32_t cell_out1_shape[] = {1, 1, 2, 5};
uint32_t cell_out1_values[] = {0xbe89ae92, 0xbe286430, 0x3e9ceb6f, 0x3d86d03b,
0x3dffb46f, 0xbe804ee8, 0xbe24adf4, 0x3c7cc930,
0xbd0309c4, 0x3da7c588};
zdnn_ztensor *all_ts_out1 = test_layer(
all_ts_out0, h01_shape, (void *)h01_values, c01_shape, (void *)c01_values,
weights1_shape, (void *)weights1_values, biases1_shape,
(void *)biases1_values, hidden_weights1_shape,
(void *)hidden_weights1_values, hidden_biases1_shape,
(void *)hidden_biases1_values, all_ts_out1_shape,
(void *)all_ts_out1_exp_values, cell_out1_shape, (void *)cell_out1_values,
is_layer_bidir[0], is_layer_bidir[1]);
free_ztensor_buffers(3, input0, all_ts_out0, all_ts_out1);
}
void lstm_fwd_to_bidir() {
// num_timesteps = 5
// num_batches = 2
// num_features = 4
// num_hidden = 4, 5
bool is_layer_bidir[] = {false, true};
// first layer
uint32_t input0_shape[] = {5, 2, 4};
uint32_t input0_values[] = {
0x3f80f554, 0x3eed5744, 0x3fe9598b, 0x3fde3340, 0x3fb14cbd, 0x3f3b5a0a,
0x3f82893d, 0x3e5414c8, 0x3f8b5bf7, 0x3f3c425a, 0x3fa6aeeb, 0x3f99290e,
0x3ffa48dc, 0x3fd4c5a9, 0x3fb4c3ba, 0x3f768450, 0x3f1acb50, 0x3eccc9d0,
0x3fd6c6c6, 0x3fb7bd3f, 0x3f230434, 0x3e2daec8, 0x3f9a57a9, 0x3e80dd48,
0x3f94a1a8, 0x3f64e95e, 0x3dc195b0, 0x3ff6bde7, 0x3fd094b3, 0x3fa067b8,
0x3fb1e4f7, 0x3e0b4360, 0x3fd2f78d, 0x3fbaec30, 0x3fd96d0d, 0x3ff7e13b,
0x3fcab802, 0x3e0fc588, 0x3f0dc4a2, 0x3f03ec80};
uint32_t h00_shape[] = {1, 2, 4};
uint32_t h00_values[] = {0x3f72895c, 0x3fc19f9d, 0x3f54b050, 0x3ff7834f,
0x3fdc7d0d, 0x3fc1fce3, 0x3ebcf5b4, 0x3ed3cdb4};
uint32_t c00_shape[] = {1, 2, 4};
uint32_t c00_values[] = {0x3fb8c472, 0x3f849e59, 0x3eb88b80, 0x3bc03f00,
0x3f1a65ee, 0x3f5d6a8e, 0x3ea8b604, 0x3fcb5de0};
uint32_t weights0_shape[] = {1, 4, 4};
uint32_t weights0_values[] = {
0x3e493898, 0x3dcbca78, 0xbeee948c, 0x3dbfaa08, 0x3ed41bd0, 0xbede9cf8,
0x3ee3743e, 0xbdac80c8, 0x3edec5e8, 0x3d3c6690, 0x3ec2a6f0, 0xbda882b8,
0x3ee1e222, 0xbea027ac, 0xbeff5dfe, 0xbe6a5f1c, 0x3dbed0f8, 0x3e67aa8c,
0x3e8c896e, 0x3e9ed100, 0xbec67a6c, 0x3e4de7f8, 0xbd813f20, 0x3ef5cf48,
0xbeb16e18, 0xbe97a46a, 0x3c9c6440, 0xbec54796, 0xbe843ed2, 0x3e1aadc8,
0x3ded4400, 0xbe3ba1ec, 0x3e44e48c, 0x3eb7435c, 0x3e7fa638, 0x3ef0d4f2,
0xbe97a134, 0x3e3f7148, 0x3dd65318, 0x3eac7f54, 0x3e3bb1dc, 0xbefd5f4a,
0xbec7b396, 0xbe5f3eb0, 0x3e817616, 0xbea61100, 0xbe9368e8, 0xbe00dcd4,
0xbef3dd78, 0xbce1b020, 0xbe9bc938, 0xbdfedb88, 0xbc133e80, 0x3d99bfa0,
0x3ee84968, 0x3cb8d280, 0xbec0c878, 0xbe51adf4, 0x3eaf6fd0, 0x3d956718,
0xbec577a2, 0x3e97e798, 0xbed7e164, 0x3df2ddd0};
uint32_t biases0_shape[] = {1, 4};
uint32_t biases0_values[] = {0xbed2f700, 0x3e8ab64c, 0x3ecb4226, 0x3eaf295c,
0x3e7604cc, 0x3e34d140, 0xbdf63f00, 0xbdd75a50,
0xbeb493ac, 0x3cb6ae60, 0xbeb47690, 0xbe8cec88,
0xbe952c30, 0x3e2ef934, 0xbe988dc4, 0xbc32ba00};
uint32_t hidden_weights0_shape[] = {1, 4, 4};
uint32_t hidden_weights0_values[] = {
0x3c63ac80, 0x3ef27eba, 0xbee5f866, 0xbe7e7cdc, 0xbd30adc0, 0x3ea29306,
0xbe72ba40, 0xbec42d02, 0x3dcf3d10, 0x3ef30cc4, 0x3eae4fce, 0xbeef9400,
0x3ea2c660, 0xbd141d60, 0xbb546b00, 0xbed810ec, 0x3e48ec5c, 0xbef59156,
0xbe331d5c, 0xbea6c676, 0x3cf559e0, 0xbe97bba0, 0xbed1d2ba, 0xbcd23440,
0xbe79d1cc, 0xbe002b3c, 0xbdd9d200, 0x3eb74200, 0x3e7245d4, 0xbe7966ec,
0x3ddae2f8, 0xbd5288f0, 0xbdcfb470, 0x3e9fb02e, 0xbdc53cf0, 0xbda03c28,
0x3e8c6456, 0xbec8528a, 0xbdc90e10, 0x3bd3e180, 0x3e8a6774, 0xbdd87bf0,
0xbee5b8ba, 0xbe6896b8, 0xbef6e502, 0xbe4f9a1c, 0xbedd0a44, 0x3e40deb8,
0xbee66a3a, 0x3ee72b36, 0xbd6c53f0, 0x3d5bc2b0, 0xbd0a36c0, 0x3e396c38,
0xbe648f70, 0xbdd664c0, 0x3ee121a2, 0xbee707ae, 0x3eccb614, 0x3eb6d016,
0xbe50d738, 0x3ea1f874, 0xbecedf54, 0x3e0eec08};
uint32_t hidden_biases0_shape[] = {1, 4};
uint32_t hidden_biases0_values[] = {
0xbe94a63c, 0x3eb32ed6, 0xbe380bcc, 0x3ed7eee0, 0x3cac4fa0, 0x3ea50604,
0xbec183fa, 0xbeafbf44, 0x3e3924a0, 0x3d81aa40, 0xbb73ed00, 0xbdca6d08,
0x3d807a40, 0xbde9d330, 0xbb663e00, 0x3d82a7c0};
uint32_t all_ts_out0_shape[] = {5, 1, 2, 4};
uint32_t all_ts_out0_exp_values[] = {
0x3d7dec3b, 0x3ec321c6, 0xbd33532c, 0xbd2813ae, 0x3d517efa, 0x3ee7cc40,
0xbd18113a, 0x3e9092b4, 0x3df21ddc, 0xbb310d91, 0xbe18e945, 0xbce56e8b,
0x3d359756, 0x3e238b20, 0xbe0c1333, 0x3e987c64, 0x3dd11579, 0xbe65fb8f,
0xbe55c118, 0xbda5413b, 0x3d7d912d, 0xbcbef32e, 0xbe3ba9c3, 0x3e30db74,
0x3dc41819, 0xbd0f17f6, 0xbdbe76a3, 0x3beeb838, 0x3cc76662, 0x3d477816,
0xbe6299de, 0x3e62029a, 0x3d3e7a2d, 0xbe47d763, 0xbe4eef76, 0x3d90d525,
0x3cfd8dfe, 0x3de03933, 0xbdb0d31e, 0x3e51f80d};
uint32_t cell_out0_shape[] = {1, 1, 2, 4};
uint32_t cell_out0_values[] = {0x3f3626d4, 0xbeab179c, 0xbf0f2c94,
0x3e03c918, 0x3e366377, 0x3e49c51e,
0xbea2454d, 0x3eeb4eda};
zdnn_ztensor *input0 =
alloc_ztensor_with_values(input0_shape, ZDNN_3DS, test_datatype,
NO_CONCAT, false, (void *)input0_values);
zdnn_ztensor *all_ts_out0 =
test_layer(input0, h00_shape, (void *)h00_values, c00_shape,
(void *)c00_values, weights0_shape, (void *)weights0_values,
biases0_shape, (void *)biases0_values, hidden_weights0_shape,
(void *)hidden_weights0_values, hidden_biases0_shape,
(void *)hidden_biases0_values, all_ts_out0_shape,
(void *)all_ts_out0_exp_values, cell_out0_shape,
(void *)cell_out0_values, false, is_layer_bidir[0]);
// second layer
uint32_t h01_shape[] = {2, 2, 5};
uint32_t h01_values[] = {0x3fe41f4c, 0x3fc316bd, 0x3e4520a0, 0x3fe7e0c3,
0x3f8930f2, 0x3c305000, 0x3f2385f8, 0x3f78c07a,
0x3feeed13, 0x3f012eea, 0x3fdb2c04, 0x3fa455aa,
0x3faaf233, 0x3f92f487, 0x3f7d3326, 0x3e3365a8,
0x3f600a90, 0x3dd59f00, 0x3ec6cda0, 0x3fd0ec63};
uint32_t c01_shape[] = {2, 2, 5};
uint32_t c01_values[] = {0x3f0d2ed6, 0x3fda7b92, 0x3fb63fe7, 0x3f34b460,
0x3f2b7888, 0x3e7fc438, 0x3fa9348a, 0x3f7f9716,
0x3ef8690c, 0x3ffbc9ad, 0x3e8dd57c, 0x3fe9d898,
0x3f7c78c0, 0x3f95c31c, 0x3fc36a05, 0x3f5e2a0a,
0x3e313c38, 0x3fa56aba, 0x3fcbfe2b, 0x3faf56e1};
uint32_t weights1_shape[] = {2, 4, 5};
uint32_t weights1_values[] = {
0x3ec8c806, 0x3c696e40, 0x3d952498, 0xbdd1716c, 0x3e10c984, 0xbe94b2a7,
0xbe9a126e, 0x3ebdf640, 0xbca2ec60, 0xbc172600, 0xbd506310, 0xbeb9158a,
0xbe1985b8, 0xbe8b7474, 0xbdcdfc84, 0x3e0332e4, 0x3eb682ba, 0x3e06e404,
0x3e1ae0c4, 0xbe8d2560, 0xbebfde02, 0xbed417e7, 0x3ec1d528, 0xbe0751fa,
0xbdfe1e6c, 0xbe77c691, 0x3ea17e98, 0xbe76cf7f, 0x3e1940d8, 0xbe2ab878,
0x3eca9984, 0x3e658114, 0x3e109bc4, 0xbe03c1e6, 0x3de69348, 0x3ed16702,
0x3e878898, 0x3e3b6830, 0x3e8d90bc, 0x3e226e48, 0xbee2f252, 0xbe83e971,
0x3e261b9c, 0x3ebc395c, 0xbed87cba, 0x3e4eb6e0, 0x3e83a64e, 0xbe18ef8e,
0x3df90638, 0x3dc5e080, 0xbe5bef69, 0x3e72843c, 0xbdfff1d8, 0xbe58ace5,
0xbe807ef0, 0xbe98cd9b, 0x3eafbdf8, 0x3e074a8c, 0xbe574539, 0xbecc25a9,
0xbec39102, 0xbea1a3c0, 0xbd3aa670, 0x3c6b9ce0, 0x3e630230, 0x3e55ae7c,
0xbe62d375, 0x3eb037d8, 0xbe0d9648, 0xbea06a9a, 0xbe81b1a9, 0xbebc9a53,
0x3e8db48a, 0xbdc724ec, 0x3ec02c1a, 0x3e5c50f0, 0x3e6ef9a8, 0x3e7d66c4,
0x3d737210, 0xbd472b98, 0x3e9e5868, 0xbe5cf1d4, 0x3db3dd28, 0x3e354500,
0x3e1f0c64, 0xbeaa740b, 0x3da48928, 0xbeaa02a3, 0xbd669538, 0xbe271822,
0x3e3b9ae0, 0xbe6de235, 0x3d987eb0, 0xbebbb5a4, 0x3e2dd3f4, 0xbe19fc78,
0x3dd306b8, 0x3e430d88, 0xbd8d3050, 0x3e987cda, 0xbe5fbd62, 0x3e0b5e64,
0xbe86a497, 0xbaa00c00, 0xbeacb04b, 0x3d21ed48, 0xbddce6cc, 0xbe68730d,
0xbe5ddf86, 0xbeb99f2f, 0x3e84b2c0, 0x3e208298, 0x3ed26dd4, 0x3ee494f2,
0xbead8f69, 0xbd3641c0, 0xbea5ddf2, 0xbdf673cc, 0x3edfa1a6, 0xbb0d5900,
0xbe6f1e1b, 0x3d190310, 0x3e9e8ea0, 0x3e55de1c, 0x3ee46d12, 0x3b9084c0,
0x3d64ba10, 0x3ece54c0, 0xbe5ec821, 0x3e915990, 0x3e3d5c4c, 0xbeb401c3,
0x3e86944e, 0xbebdbf60, 0xbe9e462c, 0x3d0f9250, 0x3d0ff4f8, 0x3d757710,
0xbec4e626, 0x3e332b14, 0xbde97700, 0xbe6f45de, 0x3d7ba930, 0xbe28e040,
0x3ee1a07c, 0xbe95df9f, 0x3dcaf230, 0x3ebc4676, 0x3ee0b168, 0xbe90de80,
0xbe2440b2, 0xbdd20768, 0xbe9acddc, 0xbed93dd3, 0x3daf9920, 0x3dad0a60,
0xbe5de779, 0x3caa1db0, 0xbedb8204, 0xbd1e1828};
uint32_t biases1_shape[] = {2, 5};
uint32_t biases1_values[] = {
0xbede7c01, 0xbdb9cd1c, 0x3e99f81e, 0xbed8b7ed, 0x3ebe51d8, 0xbdc69f70,
0x3ea85d6a, 0xbeb1737d, 0x3d428f68, 0x3ed75422, 0x3dc7ff90, 0xbeae8cee,
0x3e63833c, 0xbecfe0c1, 0xbedc1c4e, 0x3d940df8, 0x3ed2d41c, 0xbe5a9fca,
0x3e23c650, 0xbde59ef4, 0xbe37306c, 0x3e062014, 0x3ca711d0, 0xbece783b,
0x3ebde4ee, 0xbd7bb5b8, 0x3eb1c89c, 0xbe0d071e, 0x3eb8509c, 0xbedd7e2d,
0x3e769414, 0x3ee39938, 0x3e675c3c, 0xbe972362, 0x3ebf18f2, 0xbd840080,
0xbda0df98, 0x3e1469e4, 0x3e33aa40, 0x3eafcf42};
uint32_t hidden_weights1_shape[] = {2, 5, 5};
uint32_t hidden_weights1_values[] = {
0xbee1e40d, 0x3eb76f78, 0x3e9eff90, 0x3ebc5350, 0xbdea86c4, 0xbeb7e0dd,
0xbecc7bce, 0xbead1b0c, 0x3ebb7968, 0x3e497f5c, 0x3eb4c482, 0xbe9c4230,
0xbe5f6ed0, 0x3ea3e864, 0x3ecbffd4, 0x3e77c3f8, 0xbd93fe10, 0xbedd358e,
0x3eb0cbec, 0x3e177f54, 0x3ed6d390, 0xbe94b3e4, 0xbe81ffa5, 0x3ecb999a,
0x3ee4e636, 0xbebd9868, 0x3db92198, 0xbec9e6b4, 0xbec61cd1, 0xbe2ccb44,
0xbecfb148, 0x3e2de8c8, 0xbecee7d6, 0x3ed4086e, 0xbe9d7ac6, 0x3de585b8,
0x3eb61b5a, 0x3ed5ca40, 0x3ed8ea94, 0x3ed8d474, 0xbd0ab3d0, 0x3eb1c556,
0x3e4a7010, 0x3ecebb20, 0xbe44c542, 0xbe6741db, 0xbd891828, 0x3e479f54,
0xbec12893, 0xbe5113e1, 0x3eace9cc, 0xbe20b66c, 0x3e5d42c8, 0x3eb2e8ec,
0xbe1b9f76, 0x3ee4e29a, 0x3ebbff92, 0x3e79a49c, 0xbda4ce70, 0x3e5f481c,
0x3ec83690, 0x3e804fe0, 0xbe8f83dc, 0x3e3248cc, 0xbe9fee66, 0x3eb165e2,
0xbe89ca96, 0x3e036284, 0x3da2aec0, 0x3dcedbf0, 0xbdd1fa20, 0x3ecdd9da,
0xbe2089f6, 0x3e42d780, 0xbe9aebe8, 0xbdc0f07c, 0xbe872d40, 0xbdcbff10,
0x3e8472c6, 0xbe19b22c, 0xbdcc9010, 0xbe1c1d3a, 0xbda475ac, 0xbe0aeb80,
0xbed457d0, 0x3e917fbc, 0x3e667240, 0x3eb369f6, 0xbe97eae2, 0x3e3bb9b4,
0x3caf1b90, 0xbe942d27, 0x3e662ae4, 0xbd084a60, 0x3edee626, 0xbedeee8a,
0x3ed7e74a, 0x3ec2326c, 0x3ebd81c8, 0xbe025ea8, 0x3e6d851c, 0xbddabb30,
0x3ea2c8c8, 0x3d082a00, 0x3edb2580, 0x3e8e5746, 0xbe811315, 0xbd507b08,
0xbe58367b, 0x3eade05a, 0xbe20471c, 0xbe807b12, 0xbe8480f1, 0x3ed26ffe,
0xbe26eada, 0x3e5bbc44, 0xbeb030cc, 0x3eb0f98a, 0x3e3b45e4, 0x3e80b7ea,
0x3d527bf8, 0x3e99b77e, 0x3e926d0a, 0xbeadd2f6, 0xbe8219a4, 0x3e6fec98,
0xbeb25d85, 0x3e66f338, 0x3ed89bd2, 0x3ec8c0ca, 0xbe53d7b8, 0x3ebee346,
0x3d81ac10, 0x3dd8c630, 0xbd97418c, 0xbe618c84, 0xbe4a029f, 0x3ec2d2d6,
0xbedf67a9, 0xbed0b705, 0xbd203aa8, 0x3e2270c4, 0x3d763d80, 0xbe025fa2,
0xbce64df0, 0xbd301208, 0x3ec72844, 0xbe53df41, 0xbe9bf81e, 0x3e8fcc58,
0xbd8f889c, 0xbdf89d64, 0xbe8d6097, 0xbda3e468, 0x3e2a3b28, 0x3ec26bea,
0xbdb0b3c4, 0x3cbbc620, 0xbeaa2909, 0x3ec258fa, 0x3c976030, 0x3d2f4d98,
0x3e5b9460, 0xbe436636, 0x3cf049b0, 0xbea1ef22, 0x3ed3c2e8, 0x3e6328f4,
0x3e24fec4, 0xbe989ba1, 0xbe190f96, 0x3cc42620, 0xbed14480, 0xbea299d4,
0xbe24134e, 0x3dedf310, 0xbe362bda, 0x3d836668, 0xbe8525dc, 0xbe3b1bb2,
0x3e10ce08, 0xbed605fa, 0x3e122c34, 0x3ebc54aa, 0x3ec058f2, 0x3d2a1fb8,
0xbeac7e7c, 0x3d01b298, 0xbeb62674, 0xbe9d91cb, 0x3e2abb28, 0x3e4679ac,
0xbe94746f, 0xbddc5118, 0xbec0490a, 0xbddf28c4, 0xbe879404, 0x3edaf946,
0x3e791bd4, 0xbe4e7f38};
uint32_t hidden_biases1_shape[] = {2, 5};
uint32_t hidden_biases1_values[] = {
0xbe325de8, 0x3dc59638, 0xbeb3c7f8, 0x3e11fa20, 0x3e75d434, 0xbe987408,
0xbd4637a8, 0xbcc4c620, 0x3e4c5720, 0x3e9c8b2a, 0x3cc4e590, 0x3e592a78,
0xbeb798f6, 0xbe03b7b6, 0xbee0d2e5, 0xbd88748c, 0xbc914780, 0x3e9ccdb4,
0xbdf7d0f0, 0x3ec8b9ca, 0x3d7f2f50, 0xbe9933c8, 0xbeb1870e, 0xbe0d48c0,
0x3e4904fc, 0xbd912c2c, 0xbebdb332, 0x3e62e8b8, 0x3e08fc84, 0x3e37f4f4,
0x3ee1dbc6, 0x3e83aa94, 0xbd4e46b0, 0x3e20904c, 0xbee0a324, 0xbe8d3f0b,
0x3e935dc2, 0x3ed8df8e, 0x3d1ef258, 0xbed5df49};
uint32_t all_ts_out1_shape[] = {5, 2, 2, 5};
uint32_t all_ts_out1_exp_values[] = {
0x3ece7c30, 0x3e0ade44, 0x3ea77833, 0x3d8a1542, 0x3ec14d90, 0x3e0f6de1,
0xbcc5b015, 0x3e3d01bd, 0x3ddc92d0, 0x3ed872c6, 0xbdb78493, 0x3e4d107e,
0x3d629dbb, 0x3e81d6dc, 0xbe282ecb, 0xbda067b0, 0x3d215190, 0xbc606d8e,
0x3e9d2a63, 0xbe17d42e, 0x3e4adf52, 0xbc961bd7, 0x3e1f1361, 0xbcd7e3b3,
0x3ea4f150, 0x3b1279df, 0xbd0fc6b2, 0x3ce5e595, 0xbc901e15, 0x3ee2c9e3,
0xbde63869, 0x3e83c0d4, 0x3ddda844, 0x3e83a426, 0xbd9c714e, 0xbdbdde95,
0x3dbfb398, 0x3ca6b42f, 0x3e9b0b9e, 0xbd832c54, 0x3da00061, 0xbd3cde78,
0x3d0c9de8, 0xbdce55e5, 0x3e8c68d8, 0xbd389116, 0xbc3aea77, 0xbcdead41,
0xbdcf5f14, 0x3ec552b2, 0xbdf64801, 0x3ea190d7, 0x3e43aea4, 0x3e90136c,
0x3d1c6530, 0xbdb73956, 0x3e1a74cf, 0x3d91c0d8, 0x3e9f6f28, 0x3d0d3abe,
0xbb2c613b, 0xbd29ebe1, 0xbd1e8311, 0xbe0a8488, 0x3e7dc919, 0xbdadd9a7,
0x3c9a6539, 0xbd8b9ccc, 0xbe0bde89, 0x3eaf4cea, 0xbdcda1c7, 0x3ec24b51,
0x3e9f2fd6, 0x3ea62629, 0x3e1f64a3, 0xbd7dae5d, 0x3e4a2633, 0x3e13742c,
0x3eaf0a3c, 0x3e0b9059, 0xbd110365, 0xbcb1df00, 0xbd897780, 0xbe1bafd2,
0x3e63dc44, 0xbdc911b3, 0x3cd0cccd, 0xbdcc05ba, 0xbe19b36a, 0x3e95fa6d,
0xbd9d8379, 0x3e60bed7, 0x3f00a623, 0x3eb132b8, 0x3e57d24f, 0x3d00a823,
0x3e3e6cd3, 0x3e9d532b, 0x3f01cead, 0x3e8b3a37};
uint32_t cell_out1_shape[] = {1, 2, 2, 5};
uint32_t cell_out1_values[] = {
0xbd5edee1, 0xbd04585f, 0xbe0fefd7, 0xbeab8c8a, 0x3efb1f43,
0xbe19186a, 0x3d1c1ce0, 0xbe48d1fc, 0xbebf107e, 0x3f29bd16,
0xbe65346c, 0x3ec43b22, 0x3dacd5ad, 0x3ee2a358, 0xbebe8bd4,
0xbe48f9a6, 0x3d90ece4, 0xbcaaee82, 0x3f187263, 0xbeb46694};
zdnn_ztensor *all_ts_out1 = test_layer(
all_ts_out0, h01_shape, (void *)h01_values, c01_shape, (void *)c01_values,
weights1_shape, (void *)weights1_values, biases1_shape,
(void *)biases1_values, hidden_weights1_shape,
(void *)hidden_weights1_values, hidden_biases1_shape,
(void *)hidden_biases1_values, all_ts_out1_shape,
(void *)all_ts_out1_exp_values, cell_out1_shape, (void *)cell_out1_values,
is_layer_bidir[0], is_layer_bidir[1]);
free_ztensor_buffers(3, input0, all_ts_out0, all_ts_out1);
}
void lstm_bidir_to_bidir() {
// num_timesteps = 5
// num_batches = 2
// num_features = 4
// num_hidden = 4, 5
bool is_layer_bidir[] = {true, true};
// first layer
uint32_t input0_shape[] = {5, 2, 4};
uint32_t input0_values[] = {
0x3f80f554, 0x3eed5744, 0x3fe9598b, 0x3fde3340, 0x3fb14cbd, 0x3f3b5a0a,
0x3f82893d, 0x3e5414c8, 0x3f8b5bf7, 0x3f3c425a, 0x3fa6aeeb, 0x3f99290e,
0x3ffa48dc, 0x3fd4c5a9, 0x3fb4c3ba, 0x3f768450, 0x3f1acb50, 0x3eccc9d0,
0x3fd6c6c6, 0x3fb7bd3f, 0x3f230434, 0x3e2daec8, 0x3f9a57a9, 0x3e80dd48,
0x3f94a1a8, 0x3f64e95e, 0x3dc195b0, 0x3ff6bde7, 0x3fd094b3, 0x3fa067b8,
0x3fb1e4f7, 0x3e0b4360, 0x3fd2f78d, 0x3fbaec30, 0x3fd96d0d, 0x3ff7e13b,
0x3fcab802, 0x3e0fc588, 0x3f0dc4a2, 0x3f03ec80};
uint32_t h00_shape[] = {2, 2, 4};
uint32_t h00_values[] = {0x3f72895c, 0x3fc19f9d, 0x3f54b050, 0x3ff7834f,
0x3fdc7d0d, 0x3fc1fce3, 0x3ebcf5b4, 0x3ed3cdb4,
0x3fb8c472, 0x3f849e59, 0x3eb88b80, 0x3bc03f00,
0x3f1a65ee, 0x3f5d6a8e, 0x3ea8b604, 0x3fcb5de0};
uint32_t c00_shape[] = {2, 2, 4};
uint32_t c00_values[] = {0x3f504bc2, 0x3fe33d36, 0x3fd8b70c, 0x3fc21f69,
0x3f0c2aba, 0x3f190c04, 0x3fcbd235, 0x3f32a91c,
0x3ee6ed24, 0x3f9027e4, 0x3f7639bc, 0x3f44af00,
0x3ec25e00, 0x3d230b80, 0x3fe2a3cb, 0x3faee87b};
uint32_t weights0_shape[] = {2, 4, 4};
uint32_t weights0_values[] = {
0x3e44e48c, 0x3eb7435c, 0x3e7fa638, 0x3ef0d4f2, 0xbe97a134, 0x3e3f7148,
0x3dd65318, 0x3eac7f54, 0x3e3bb1dc, 0xbefd5f4a, 0xbec7b396, 0xbe5f3eb0,
0x3e817616, 0xbea61100, 0xbe9368e8, 0xbe00dcd4, 0x3be1d000, 0x3ed3b0f2,
0xbefdbbe6, 0xbe937b62, 0xbdae18e0, 0xbe15aae8, 0x3e671d1c, 0x3e933052,
0xbe86d40a, 0xbe97fc56, 0xbe75e520, 0x3e879224, 0x3d8757d8, 0xbe3d5b84,
0xbeaad6d0, 0x3ec47c50, 0x3e493898, 0x3dcbca78, 0xbeee948c, 0x3dbfaa08,
0x3ed41bd0, 0xbede9cf8, 0x3ee3743e, 0xbdac80c8, 0x3edec5e8, 0x3d3c6690,
0x3ec2a6f0, 0xbda882b8, 0x3ee1e222, 0xbea027ac, 0xbeff5dfe, 0xbe6a5f1c,
0x3d7fab80, 0x3e65a254, 0x3e290ef0, 0x3e83cb7a, 0x3ee54c20, 0xbeb4f724,
0x3ec00ef2, 0xbef7935a, 0x3e9c9930, 0xbe58ff9c, 0xbe24d228, 0x3eb91542,
0xbea1d8c6, 0x3e169740, 0x3a51d400, 0xbed3b130, 0xbef3dd78, 0xbce1b020,
0xbe9bc938, 0xbdfedb88, 0xbc133e80, 0x3d99bfa0, 0x3ee84968, 0x3cb8d280,
0xbec0c878, 0xbe51adf4, 0x3eaf6fd0, 0x3d956718, 0xbec577a2, 0x3e97e798,
0xbed7e164, 0x3df2ddd0, 0x3e39b6d8, 0x3ed270de, 0xbef20a42, 0x3ee07afa,
0xbe2afcc4, 0x3e0b3574, 0x3ddd3bb0, 0xbea63fd0, 0xbe0f13d4, 0xbe72401c,
0xbe8fa9a8, 0xbd68fbd0, 0x3e174298, 0xbe70adfc, 0xbee43e50, 0x3e12af48,
0x3e48ec5c, 0xbef59156, 0xbe331d5c, 0xbea6c676, 0x3cf559e0, 0xbe97bba0,
0xbed1d2ba, 0xbcd23440, 0xbe79d1cc, 0xbe002b3c, 0xbdd9d200, 0x3eb74200,
0x3e7245d4, 0xbe7966ec, 0x3ddae2f8, 0xbd5288f0, 0x3c827de0, 0x3da6bf30,
0xbdea2a18, 0x3e21e080, 0xbeac41fa, 0x3ed46246, 0xbcb62760, 0xbc28fd40,
0xbeceee2c, 0xbe2ba4a8, 0xbe9bea52, 0xbde64cc0, 0x3ecc0d98, 0x3e16cff8,
0x3e2d28ac, 0xbe9dce58};
uint32_t biases0_shape[] = {2, 4};
uint32_t biases0_values[] = {
0xbe94a63c, 0x3eb32ed6, 0xbe380bcc, 0x3ed7eee0, 0x3e8a8150, 0x3ef02ee8,
0x3ecd1648, 0xbee49ea0, 0x3cac4fa0, 0x3ea50604, 0xbec183fa, 0xbeafbf44,
0xbead3520, 0xbefc8dba, 0xbecd9510, 0x3eca1ab6, 0x3e3924a0, 0x3d81aa40,
0xbb73ed00, 0xbdca6d08, 0xbeaeb3ce, 0xbda575e8, 0xbea64132, 0x3eb1c3f8,
0x3d807a40, 0xbde9d330, 0xbb663e00, 0x3d82a7c0, 0xbec189ba, 0xbe79ce38,
0xbef751c4, 0xbe9157c6};
uint32_t hidden_weights0_shape[] = {2, 4, 4};
uint32_t hidden_weights0_values[] = {
0xbdcfb470, 0x3e9fb02e, 0xbdc53cf0, 0xbda03c28, 0x3e8c6456, 0xbec8528a,
0xbdc90e10, 0x3bd3e180, 0x3e8a6774, 0xbdd87bf0, 0xbee5b8ba, 0xbe6896b8,
0xbef6e502, 0xbe4f9a1c, 0xbedd0a44, 0x3e40deb8, 0x3e31d250, 0xbe85abba,
0x3d2b1290, 0x3eb145b4, 0xbe3ad12c, 0x3ba19380, 0x3d7fb970, 0x3ee6af64,
0x3e425874, 0x3e53b624, 0xbec940fa, 0x3e9676d8, 0x3eaa7c86, 0x3d208490,
0x3d20f2e0, 0x3d893818, 0x3c63ac80, 0x3ef27eba, 0xbee5f866, 0xbe7e7cdc,
0xbd30adc0, 0x3ea29306, 0xbe72ba40, 0xbec42d02, 0x3dcf3d10, 0x3ef30cc4,
0x3eae4fce, 0xbeef9400, 0x3ea2c660, 0xbd141d60, 0xbb546b00, 0xbed810ec,
0x3eb10914, 0xbe77060c, 0x3dc91810, 0x3e4aaa5c, 0xbebe9294, 0x3db7f4e0,
0xbebe13ca, 0xbd80e658, 0x3e51bfac, 0xbe84fb22, 0x3daa7e98, 0xbed1dd9a,
0xbe2c296c, 0x3debef40, 0x3e5a1364, 0xbd9dda90, 0xbee66a3a, 0x3ee72b36,
0xbd6c53f0, 0x3d5bc2b0, 0xbd0a36c0, 0x3e396c38, 0xbe648f70, 0xbdd664c0,
0x3ee121a2, 0xbee707ae, 0x3eccb614, 0x3eb6d016, 0xbe50d738, 0x3ea1f874,
0xbecedf54, 0x3e0eec08, 0x3e6f1c7c, 0x3eff635a, 0x3ec152aa, 0xbdeac2f0,
0xbe7913dc, 0x3ea2818e, 0x3effe6c2, 0xbe33aea0, 0xbed424ec, 0xbeb0f4b2,
0x3edfd858, 0x3ed23042, 0xbedc23ca, 0x3e4850f4, 0x3ec65644, 0x3e8f750a,
0x3e7604cc, 0xbed2f700, 0xbeb493ac, 0xbe952c30, 0x3e34d140, 0x3e8ab64c,
0x3cb6ae60, 0x3e2ef934, 0xbdf63f00, 0x3ecb4226, 0xbeb47690, 0xbe988dc4,
0xbdd75a50, 0x3eaf295c, 0xbe8cec88, 0xbc32ba00, 0xbe9a0e30, 0xbea0746a,
0xbdb84258, 0xbe2dfde0, 0x3ee625fe, 0x3e12e488, 0x3e4753f8, 0x3e79a2f4,
0x3e776090, 0xbe337cec, 0x3db5e280, 0xbeb2cefe, 0x3e8b8e00, 0x3ec806fc,
0x3e59d6f8, 0x3de74688};
uint32_t hidden_biases0_shape[] = {2, 4};
uint32_t hidden_biases0_values[] = {
0xbd130f20, 0x3efd3ec0, 0x3e38f410, 0x3c67f0c0, 0x3ee3a1ba, 0xbe031ab0,
0x3e6147f4, 0x3ee41404, 0xbec83e98, 0xbe862d7a, 0x3eceb7d8, 0xbecfc186,
0x3ed28de2, 0x3ed19a42, 0x3eb74124, 0x3ec5aa22, 0xbeddda26, 0x3e7da22c,
0xbeb65808, 0xbe1156a8, 0x3e296114, 0x3effeaca, 0x3e84c718, 0x3e9f2458,
0xbe2bc8cc, 0xbd97a438, 0x3bb33680, 0x3ed32696, 0xbe33322c, 0x3e75abf4,
0x3d6b5420, 0xbdf48c88};
uint32_t all_ts_out0_shape[] = {5, 2, 2, 4};
uint32_t all_ts_out0_exp_values[] = {
0xbe7a53bc, 0x3e9f7692, 0xbc9ed153, 0x3ee831ab, 0xbe77d13d, 0x3e31f762,
0x3d8ddd1a, 0x3e6762b7, 0xbd9698a5, 0x3e4d00b0, 0xbe496231, 0x3e91c291,
0xbe1c9299, 0x3ed36114, 0xbe45f6fb, 0x3ed3744e, 0xbec68241, 0x3e276d3e,
0xbdd85a96, 0x3ec34484, 0xbed55ee0, 0x3dc4d879, 0x3c76e7a9, 0x3e49a5fe,
0xbd67a64d, 0x3e9192cf, 0xbe51be30, 0x3ea200ca, 0xbe09e0cc, 0x3ed8b4cc,
0xbe37dffa, 0x3eba17c2, 0xbecd3af4, 0x3dfa0768, 0xbe162afb, 0x3e8c28d0,
0xbea2f4b5, 0x3e259552, 0xbd4ff47a, 0x3deba7d9, 0xbcda2de0, 0x3e82d495,
0xbe3a1843, 0x3e90a6e1, 0xbe23555a, 0x3e8f0959, 0xbda17677, 0x3ebd8b84,
0xbf0b5dde, 0x3dc3eca8, 0xbe264ef9, 0x3e0dfb28, 0xbec6e89f, 0x3d867695,
0xbc05f6c6, 0x3cc15a89, 0x3d7d9314, 0x3eeaeca2, 0xbe61c09e, 0x3e8a8119,
0xbdcf0300, 0x3eb7170f, 0xbc19745a, 0x3ec402a2, 0xbf02ead9, 0x3d31e00b,
0xbdf09f2e, 0x3ddd5180, 0xbede6b61, 0x3da42fd6, 0xbdeab752, 0xbd45daf2,
0x3d57812b, 0x3ec346a9, 0xbd12e422, 0x3e53ccf4, 0xbc8ff60e, 0x3ea5b06e,
0x3e9c34b9, 0x3ed3c79c};
uint32_t cell_out0_shape[] = {1, 2, 2, 4};
uint32_t cell_out0_values[] = {
0xbfd5981b, 0x3eab93df, 0xbeb6282e, 0x3e2b0ac4, 0xbf94f0fd, 0x3e9d97ae,
0xbe856685, 0xbdb00498, 0xbe5181d2, 0x3eaf261e, 0xbf2f0982, 0x3fcb5ba8,
0xbf0caf39, 0x3f471fab, 0xbf2da98a, 0x3fb4fadd};
zdnn_ztensor *input0 =
alloc_ztensor_with_values(input0_shape, ZDNN_3DS, test_datatype,
NO_CONCAT, false, (void *)input0_values);
zdnn_ztensor *all_ts_out0 =
test_layer(input0, h00_shape, (void *)h00_values, c00_shape,
(void *)c00_values, weights0_shape, (void *)weights0_values,
biases0_shape, (void *)biases0_values, hidden_weights0_shape,
(void *)hidden_weights0_values, hidden_biases0_shape,
(void *)hidden_biases0_values, all_ts_out0_shape,
(void *)all_ts_out0_exp_values, cell_out0_shape,
(void *)cell_out0_values, false, is_layer_bidir[0]);
// second layer
uint32_t h01_shape[] = {2, 2, 5};
uint32_t h01_values[] = {0x3fc827a5, 0x3fc7d2ab, 0x3fe27e59, 0x3ea84764,
0x3e9400d4, 0x3f8e916a, 0x3fca1262, 0x3e688b78,
0x3eb894e8, 0x3f6cf872, 0x3fbc6eee, 0x3da5ca40,
0x3f174faa, 0x3fe12bad, 0x3d2fc9e0, 0x3e7666b8,
0x3faea7a3, 0x3ee02d48, 0x3fc8ba6b, 0x3f940f37};
uint32_t c01_shape[] = {2, 2, 5};
uint32_t c01_values[] = {0x3e1a8538, 0x3f756c1c, 0x3fda8620, 0x3faac825,
0x3fa2beb7, 0x3f98b1e4, 0x3f67802a, 0x3d99f2f0,
0x3f724b2e, 0x3fcf0846, 0x3f72e100, 0x3f054054,
0x3f010382, 0x3ff4fbf0, 0x3f96e796, 0x3fdf1f5c,
0x3fb69da2, 0x3f23c3d0, 0x3fdae58c, 0x3f20d682};
uint32_t weights1_shape[] = {2, 8, 5};
uint32_t weights1_values[] = {
0x3e83aa94, 0xbea1a3c0, 0x3c6b9ce0, 0xbebfde02, 0x3ec1d528, 0xbd4e46b0,
0xbe62d375, 0xbe0d9648, 0xbe77c691, 0xbe76cf7f, 0x3e20904c, 0xbebc9a53,
0xbdc724ec, 0x3eca9984, 0x3e109bc4, 0xbee0a324, 0x3e6ef9a8, 0x3d737210,
0x3ed16702, 0x3e3b6830, 0xbec39102, 0xbd3aa670, 0x3e630230, 0xbed417e7,
0xbe0751fa, 0x3e55ae7c, 0x3eb037d8, 0xbea06a9a, 0x3ea17e98, 0x3e1940d8,
0xbe81b1a9, 0x3e8db48a, 0x3ec02c1a, 0x3e658114, 0xbe03c1e6, 0x3e5c50f0,
0x3e7d66c4, 0xbd472b98, 0x3e878898, 0x3e8d90bc, 0x3e7b0bac, 0xbecc0226,
0x3cbe6420, 0xbe927f99, 0x3aa69a00, 0x3e6e6210, 0x3e8ca274, 0x3da1fbe0,
0xbe9eba88, 0x3ecdd426, 0x3e8e02b2, 0x3d3f7208, 0xb9c68000, 0xbd938128,
0x3ee3b00e, 0xbe91fb37, 0x3cd35960, 0x3e13e288, 0xbda1fd74, 0xbe84a2b8,
0x3ee40ec6, 0xbe7d9782, 0x3ed942f8, 0x3e4bb92c, 0x3da325a8, 0xbe87ba02,
0xbe4018f0, 0x3df38580, 0xbe43be48, 0x3d586020, 0x3ee497e8, 0xbe05e5dc,
0xbe27a444, 0x3eb689d4, 0xbe56b587, 0xbedbbe59, 0xbedf0e3e, 0xbe3c776a,
0xbea0aa84, 0xbe1e37de, 0x3ec258fa, 0xbedc1c4e, 0x3ca711d0, 0x3cc4e590,
0x3e11fa20, 0x3cf049b0, 0xbede7c01, 0xbece783b, 0x3e592a78, 0x3e75d434,
0xbe989ba1, 0xbdb9cd1c, 0x3ebde4ee, 0xbeb798f6, 0x3d7f2f50, 0xbe24134e,
0x3e99f81e, 0x3e769414, 0xbe03b7b6, 0xbe9933c8, 0x3dc7ff90, 0xbed8b7ed,
0x3ee39938, 0xbee0d2e5, 0xbeb1870e, 0xbeae8cee, 0x3ebe51d8, 0x3e675c3c,
0xbe325de8, 0xbe0d48c0, 0x3e63833c, 0xbe37306c, 0xbe972362, 0x3dc59638,
0x3e4904fc, 0xbecfe0c1, 0x3e062014, 0x3ebf18f2, 0xbeb3c7f8, 0x3ee1dbc6,
0xbdcf25dc, 0xbeb5b12a, 0xbd48ccd8, 0xbe46414a, 0x3e24bcd8, 0x3d26cb70,
0x3cdf2e90, 0xbe101be0, 0x3eacd6bc, 0xbea06bf2, 0xbea00f51, 0x3eb58a42,
0xbeb81f6b, 0xbe3e8e36, 0xbdc54a50, 0x3ec2e956, 0xbe019774, 0x3941c000,
0xbe9f0e96, 0x3ec6a716, 0xbd6ccb78, 0x3e578a54, 0x3eac49a0, 0x3d8d0aa8,
0xbeafad22, 0xbea7780c, 0x3ec4c2fc, 0x3ec0e9e4, 0x3e893e48, 0xbe05d8ee,
0x3e061770, 0x3e97de7c, 0x3d2ae830, 0xbece70c1, 0x3e163290, 0xbea2cdd5,
0xbe703894, 0x3dd00540, 0x3ecefc06, 0xbec899a2, 0xbdfe1e6c, 0x3e0b5e64,
0xbaa00c00, 0xbde97700, 0x3d7ba930, 0xbe2ab878, 0xbddce6cc, 0xbe5ddf86,
0xbe95df9f, 0x3ebc4676, 0x3de69348, 0x3e208298, 0x3ee494f2, 0xbe2440b2,
0xbe9acddc, 0x3e226e48, 0xbea5ddf2, 0x3edfa1a6, 0x3dad0a60, 0x3caa1db0,
0xbe5fbd62, 0xbe86a497, 0xbeacb04b, 0xbe6f45de, 0xbe28e040, 0x3d21ed48,
0xbe68730d, 0xbeb99f2f, 0x3dcaf230, 0x3ee0b168, 0x3e84b2c0, 0x3ed26dd4,
0xbead8f69, 0xbdd20768, 0xbed93dd3, 0xbd3641c0, 0xbdf673cc, 0xbb0d5900,
0xbe5de779, 0xbedb8204, 0x3df812c8, 0x3ee2c0f8, 0x3dd6ac68, 0x3d6a6440,
0x3e478690, 0xbe9f3858, 0xbe0bfad6, 0x3c8a0b80, 0xbe376674, 0xbde0babc,
0x3d971e50, 0x3e78da1c, 0x3e9124d4, 0xbe1ad584, 0x3e462330, 0x3e462a34,
0xbe02fc74, 0xbdb961b8, 0x3def8690, 0x3ea8f792, 0xbe347690, 0xbd85c98c,
0x3d37b120, 0xbda59be8, 0x3ca89770, 0x3ebe31d8, 0xbd9b37b0, 0xbe2a5a0c,
0x3e95a0ea, 0x3db33be0, 0xbcf119f0, 0xbb361500, 0xbebfc12d, 0x3ccf8430,
0x3c851e40, 0x3e4fc6b8, 0xbe2cff70, 0x3e3950e8, 0x3e2cfbb8, 0x3e9e88a0,
0x3ee1a07c, 0xbedeee8a, 0x3eb369f6, 0xbe19b22c, 0xbd0ab3d0, 0xbe90de80,
0xbe872d40, 0x3e662ae4, 0xbed457d0, 0xbe6741db, 0x3daf9920, 0xbe1c1d3a,
0x3ec2326c, 0x3e3bb9b4, 0x3db92198, 0xbd1e1828, 0x3e667240, 0x3e8472c6,
0x3edee626, 0x3e2de8c8, 0xbdc0f07c, 0xbe942d27, 0xbe0aeb80, 0xbe025ea8,
0x3eb61b5a, 0xbdcc9010, 0x3ed7e74a, 0xbe97eae2, 0xbebd9868, 0x3eb1c556,
0x3e917fbc, 0xbdcbff10, 0xbd084a60, 0xbecfb148, 0xbd891828, 0x3caf1b90,
0xbda475ac, 0x3ebd81c8, 0x3de585b8, 0xbec9e6b4, 0xbe4157cc, 0x3e8ad580,
0xbd7cdcb8, 0x3eaf736e, 0x3e858166, 0xbe736e40, 0x3ee2894e, 0x3dc70f30,
0x3ede9074, 0x3e75fc90, 0x3e478d4c, 0x3db95270, 0x3d74a7f0, 0xbe87d88c,
0x3e7e8034, 0xbec7475c, 0xbcf41780, 0xbdacfd44, 0xbce470b0, 0xbeb5ea1e,
0x3beaf1c0, 0x3ee163d0, 0xbee4efd6, 0xbe377cb8, 0x3d405f70, 0xbe529b09,
0xbe43b460, 0x3cbb9700, 0xbed30845, 0x3ed51bde, 0x3e97214e, 0xbd12c9a0,
0xbc590b60, 0xbea69d53, 0xbe7f92d8, 0xbed52ee3, 0xbe488982, 0x3d89c8b8,
0x3ed6e7ce, 0x3ecbb182};
uint32_t biases1_shape[] = {2, 5};
uint32_t biases1_values[] = {
0xbea9c5c8, 0x3dd289d0, 0x3d851878, 0xbe5655f7, 0x3e1f747c, 0xbe134938,
0x3ad25d00, 0xbdb01a08, 0xbd1bbbd0, 0xbeb29254, 0x3e935dc2, 0x3ed8df8e,
0x3d1ef258, 0xbed5df49, 0x3ea0bcaa, 0x3daba420, 0xbe13420a, 0x3e9b1762,
0x3cc83240, 0x3dabe7d8, 0xbecccd1f, 0x3dd09a78, 0xbe91286a, 0xbd4613f0,
0x3d6b9ee0, 0xbb343e80, 0xbebe0edb, 0xbdc50970, 0x3e84b35e, 0xbe9f3779,
0x3ec31294, 0xbe5a7ee4, 0x3e92b048, 0xbd68b2e8, 0xbe597ddb, 0xbe9a5704,
0xbddfa3c0, 0xbdfb13dc, 0x3d9b0ca0, 0x3d980d78};
uint32_t hidden_weights1_shape[] = {2, 5, 5};
uint32_t hidden_weights1_values[] = {
0x3d81ac10, 0x3dd8c630, 0xbd97418c, 0x3e10ce08, 0xbed605fa, 0x3ec2d2d6,
0xbedf67a9, 0xbed0b705, 0x3d2a1fb8, 0xbeac7e7c, 0x3d763d80, 0xbe025fa2,
0xbce64df0, 0x3e2abb28, 0x3e4679ac, 0xbe53df41, 0xbe9bf81e, 0x3e8fcc58,
0xbddf28c4, 0xbe879404, 0x3ed89bd2, 0x3ec8c0ca, 0x3dedf310, 0xbe362bda,
0x3d836668, 0xbea63515, 0xbed34fff, 0xbeb2cae3, 0xbedc6d5d, 0xbec0db92,
0x3e49d700, 0x3e8c699a, 0x3ead673e, 0x3e9acf32, 0x3ea5bbea, 0x3d79a270,
0x3e92763a, 0x3e0fb304, 0x3ecb49b0, 0xbed82f3e, 0x3e5f5638, 0xbecf279c,
0x3ee267e6, 0x3e8c4992, 0x3dcc9cb0, 0xbee4ae25, 0x3e2dd470, 0xbee1c411,
0x3e983a74, 0xbe95fc4a, 0xbecee7d6, 0x3ed4086e, 0xbe9d7ac6, 0xbe53d7b8,
0x3ebee346, 0x3ed5ca40, 0x3ed8ea94, 0x3ed8d474, 0xbe618c84, 0xbe4a029f,
0x3e4a7010, 0x3ecebb20, 0xbe44c542, 0xbd203aa8, 0x3e2270c4, 0x3e479f54,
0xbec12893, 0xbe5113e1, 0xbd301208, 0x3ec72844, 0xbec61cd1, 0xbe2ccb44,
0x3e6fec98, 0xbeb25d85, 0x3e66f338, 0x3df0bb68, 0xbce89ee0, 0x3ed04f64,
0xbe2a0094, 0x3d93c7f8, 0x3ea117be, 0x3e18bfa8, 0x3e99bb1e, 0xbd4da508,
0x3ddd3e70, 0xbe442dc0, 0x3e0955f0, 0x3ea0fb84, 0xbe7777df, 0x3ec92466,
0x3e531f20, 0x3ebf9b54, 0xbd6c5ae0, 0x3e6a16c8, 0x3e26cc6c, 0xbecafb69,
0x3ee10096, 0xbeb1b1ae, 0x3e20c074, 0xbec8cbb7, 0x3e122c34, 0x3ebc54aa,
0x3ec058f2, 0x3ed2d41c, 0x3ea85d6a, 0x3d01b298, 0xbeb62674, 0xbe9d91cb,
0xbe5a9fca, 0xbeb1737d, 0xbe94746f, 0xbddc5118, 0xbec0490a, 0x3e23c650,
0x3d428f68, 0x3edaf946, 0x3e791bd4, 0xbe4e7f38, 0xbde59ef4, 0x3ed75422,
0xbe8525dc, 0xbe3b1bb2, 0x3d940df8, 0xbdc69f70, 0xbd7bb5b8, 0xbe35e4f4,
0xbed7e492, 0xbebb3390, 0xbe7a2866, 0x3ed07d84, 0x3da22d18, 0x3e316444,
0xbeb70a96, 0x3e185bfc, 0xbee1383b, 0xbe340e34, 0xbe41d6c8, 0x3e8902e6,
0x3ca49640, 0xbee3b077, 0xbd90ea54, 0xbe8b4e16, 0x3e68bf70, 0xbea3a41a,
0x3d6ab290, 0xbed906ca, 0xbe34b29a, 0x3d740020, 0x3dc51748, 0x3eac6c0c,
0x3eb1c89c, 0xbda0df98, 0xbc914780, 0xbd4637a8, 0xbebdb332, 0xbe0d071e,
0x3e1469e4, 0x3e9ccdb4, 0xbcc4c620, 0x3e62e8b8, 0x3eb8509c, 0x3e33aa40,
0xbdf7d0f0, 0x3e4c5720, 0x3e08fc84, 0xbedd7e2d, 0x3eafcf42, 0x3ec8b9ca,
0x3e9c8b2a, 0x3e37f4f4, 0xbd840080, 0xbd88748c, 0xbe987408, 0xbd912c2c,
0xbe8d3f0b, 0xbe8b53f4, 0x3ee473e2, 0x3ec9ef0c, 0x3dcb4df8, 0xbd1b6dd0,
0x3dc99f48, 0x3e952b7e, 0xbee3d029, 0xbe794ffd, 0xbe19d608, 0x3ea0f704,
0xbe80c7e3, 0x3e77fb08, 0x3d81cb10, 0x3e85eb20, 0x3e0d3144, 0x3d1e5550,
0xbe04f4de, 0xbe94a906, 0x3d1deee8, 0x3dc63590, 0x3e69f3cc, 0xbee03730,
0xbeac4f21, 0x3eb2ba24};
uint32_t hidden_biases1_shape[] = {2, 5};
uint32_t hidden_biases1_values[] = {
0x3df84f58, 0x3e9ba3fc, 0x3ec36c40, 0x3eb9b38a, 0xbea9a47a, 0xbd059ee0,
0xbe847d16, 0x3e03a480, 0x3e826528, 0xbe8f0d14, 0xbec763d2, 0xbed21657,
0x3edf9e2a, 0xbde46c64, 0x3cb6fdc0, 0x3e683924, 0xbe9dbc3c, 0xbd014578,
0x3e801014, 0x3ec60e30, 0xbd92bdd4, 0xbe877dfe, 0xbe82d308, 0x3dd941a0,
0x3ed6ece6, 0xbe84df2a, 0x3e4a6960, 0xbd005890, 0xbed843d9, 0xbe8405ca,
0xbeb76e74, 0x3e8a9360, 0xbe6d6e1d, 0xbdb26478, 0x3eb5c09c, 0x3e39b660,
0xbe1b530c, 0x3e9e0b48, 0x3eb50338, 0x3d853c28};
uint32_t all_ts_out1_shape[] = {5, 2, 2, 5};
uint32_t all_ts_out1_exp_values[] = {
0xbdd1fa2b, 0xbd3f4ead, 0x3e0461ac, 0x3ee96b3b, 0x3eb74b45, 0x3e90e20c,
0x3cc3de88, 0xbe6c0e8e, 0x3e8982be, 0x3e9c9c61, 0xbcc322be, 0xbde8fc7c,
0xbd1c8d3e, 0xbd2b6c1d, 0xbe42fb01, 0x3c57f78f, 0xbdc04b1a, 0xbd33a392,
0xbb95a554, 0xbe5066f9, 0xbd9fc1a4, 0xbe477008, 0xbdf132ab, 0x3e9bcf26,
0x3eb93873, 0x3ddaf977, 0xbdd3166c, 0xbe619ffb, 0x3e4adfd0, 0x3e9fccd2,
0xbcc2dbcd, 0xbdf980ee, 0xbccdb7ad, 0xba30c49d, 0xbe42c10d, 0x3b83d9f7,
0xbdcf460e, 0xbd25edf3, 0x3ccf389e, 0xbe583262, 0xbdab0f55, 0xbe7eb2ff,
0xbe4f90b5, 0x3e3ad707, 0x3eaa53e0, 0x3bddcfca, 0xbe1775a7, 0xbe7ea785,
0x3dfb50db, 0x3e94d66d, 0xbd37abbd, 0xbe0e311e, 0x3b99893a, 0x3d3ff63a,
0xbe32a8f4, 0xbc20a1ff, 0xbdd2936b, 0xbca7aae4, 0x3dceaf3e, 0xbe526074,
0xbda8f646, 0xbea1ae04, 0xbe7407c4, 0x3de07121, 0x3eab13d6, 0xbd35fd1d,
0xbe3dd8b8, 0xbe8900d2, 0x3d99c517, 0x3e8fad14, 0xbd509783, 0xbe25d588,
0x3d1331e9, 0x3e1c1ebc, 0xbdde9924, 0x3cc65017, 0xbdb670f0, 0x3b42adb7,
0x3e92ec47, 0xbe359c1f, 0xbdb2ab28, 0xbea20225, 0xbe84c3cf, 0x3d93e6e6,
0x3e9ee28e, 0xbd8f777c, 0xbe39d1e1, 0xbe997113, 0x3d0e7f2b, 0x3e6fee78,
0xbc9bfb45, 0xbe70f085, 0x3da7354e, 0x3e8801bd, 0x3db26c6d, 0x3e37c0bd,
0xbc2f97f7, 0x3d7034b4, 0x3e824266, 0xbde7ae61};
uint32_t cell_out1_shape[] = {1, 2, 2, 5};
uint32_t cell_out1_values[] = {
0xbe64bd12, 0xbf184520, 0xbf34c5c0, 0x3e25569a, 0x3f2f7ae7,
0xbe274afb, 0xbeabac90, 0xbf47b603, 0x3db135ad, 0x3f08b30b,
0xbd87fabc, 0xbe95e9f6, 0xbd845920, 0xbd816afa, 0xbebb7434,
0x3d1341a1, 0xbe8512e6, 0xbd941748, 0xbbe9b318, 0xbeb82cf6};
zdnn_ztensor *all_ts_out1 = test_layer(
all_ts_out0, h01_shape, (void *)h01_values, c01_shape, (void *)c01_values,
weights1_shape, (void *)weights1_values, biases1_shape,
(void *)biases1_values, hidden_weights1_shape,
(void *)hidden_weights1_values, hidden_biases1_shape,
(void *)hidden_biases1_values, all_ts_out1_shape,
(void *)all_ts_out1_exp_values, cell_out1_shape, (void *)cell_out1_values,
is_layer_bidir[0], is_layer_bidir[1]);
free_ztensor_buffers(3, input0, all_ts_out0, all_ts_out1);
}
void lstm_bidir_to_fwd() {
// num_timesteps = 5
// num_batches = 2
// num_features = 4
// num_hidden = 5, 4
bool is_layer_bidir[] = {true, false};
// first layer
uint32_t input0_shape[] = {5, 2, 4};
uint32_t input0_values[] = {
0x3f80f554, 0x3eed5744, 0x3fe9598b, 0x3fde3340, 0x3fb14cbd, 0x3f3b5a0a,
0x3f82893d, 0x3e5414c8, 0x3f8b5bf7, 0x3f3c425a, 0x3fa6aeeb, 0x3f99290e,
0x3ffa48dc, 0x3fd4c5a9, 0x3fb4c3ba, 0x3f768450, 0x3f1acb50, 0x3eccc9d0,
0x3fd6c6c6, 0x3fb7bd3f, 0x3f230434, 0x3e2daec8, 0x3f9a57a9, 0x3e80dd48,
0x3f94a1a8, 0x3f64e95e, 0x3dc195b0, 0x3ff6bde7, 0x3fd094b3, 0x3fa067b8,
0x3fb1e4f7, 0x3e0b4360, 0x3fd2f78d, 0x3fbaec30, 0x3fd96d0d, 0x3ff7e13b,
0x3fcab802, 0x3e0fc588, 0x3f0dc4a2, 0x3f03ec80};
uint32_t h00_shape[] = {2, 2, 5};
uint32_t h00_values[] = {0x3f72895c, 0x3fc19f9d, 0x3f54b050, 0x3ff7834f,
0x3fdc7d0d, 0x3fc1fce3, 0x3ebcf5b4, 0x3ed3cdb4,
0x3fb8c472, 0x3f849e59, 0x3eb88b80, 0x3bc03f00,
0x3f1a65ee, 0x3f5d6a8e, 0x3ea8b604, 0x3fcb5de0,
0x3f504bc2, 0x3fe33d36, 0x3fd8b70c, 0x3fc21f69};
uint32_t c00_shape[] = {2, 2, 5};
uint32_t c00_values[] = {0x3f0c2aba, 0x3f190c04, 0x3fcbd235, 0x3f32a91c,
0x3ee6ed24, 0x3f9027e4, 0x3f7639bc, 0x3f44af00,
0x3ec25e00, 0x3d230b80, 0x3fe2a3cb, 0x3faee87b,
0x3f1b63b4, 0x3e2f90c0, 0x3e04e860, 0x3df0eef0,
0x3f4d0d62, 0x3fef4e7c, 0x3f68732e, 0x3fd013d6};
uint32_t weights0_shape[] = {2, 4, 5};
uint32_t weights0_values[] = {
0x3ed76812, 0xbeda1e9d, 0xbcc9dc90, 0xbe8b56d8, 0xbde3f398, 0x3e9a494e,
0xbc03b300, 0x3d898450, 0x3ecfc37a, 0x3ca54f60, 0xbe47ad20, 0xbeac6e30,
0xbe3b8b06, 0x3e9cea58, 0x3d85a140, 0xbde68434, 0xbeb09ec1, 0x3e87de1e,
0xbec116de, 0x3dd939f0, 0x3d190310, 0x3e9e8ea0, 0x3e55de1c, 0x3ee46d12,
0x3eace9cc, 0x3d64ba10, 0x3ece54c0, 0xbe5ec821, 0x3e915990, 0x3ee4e29a,
0xbeb401c3, 0x3e86944e, 0xbebdbf60, 0xbe9e462c, 0x3ec83690, 0x3d0ff4f8,
0x3d757710, 0xbec4e626, 0x3e332b14, 0x3eb165e2, 0xbed56486, 0x3dab6e00,
0x3e301b34, 0x3ea3ea60, 0x3e64a8e0, 0x3ecb70ec, 0xbd9a4a9c, 0xbe879f2a,
0x3e2b3b3c, 0x3dbfb2a0, 0x3eae1a26, 0xbd96b870, 0x3e27e118, 0xbee29f7d,
0xbeb29e53, 0xbee46847, 0xbe51a0d6, 0x3e67965c, 0xbe9488c8, 0xbe83d8ea,
0xbe5cf1d4, 0x3db3dd28, 0x3e354500, 0x3e1f0c64, 0xbe6f1e1b, 0x3da48928,
0xbeaa02a3, 0xbd669538, 0xbe271822, 0x3b9084c0, 0xbe6de235, 0x3d987eb0,
0xbebbb5a4, 0x3e2dd3f4, 0x3e3d5c4c, 0x3dd306b8, 0x3e430d88, 0xbd8d3050,
0x3e987cda, 0x3d0f9250, 0x3e33b614, 0xbedba477, 0xbe20347e, 0xbe952b16,
0x3c4ba340, 0x3cdb72e0, 0xbe87b6cc, 0xbebbabe9, 0xbcbc0320, 0xbd1e06b8,
0xbe5f7204, 0xbde54684, 0xbdc2d30c, 0x3ea3e928, 0x3db95c18, 0x3e58b204,
0xbe5f126c, 0x3dc3c730, 0xbd3c4ee0, 0x3e91971e, 0xbdd1fa20, 0xbe89ca96,
0xbe8f83dc, 0xbda4ce70, 0xbe1b9f76, 0xbe20b66c, 0x3ecdd9da, 0x3e036284,
0x3e3248cc, 0x3e5f481c, 0x3ebbff92, 0x3e5d42c8, 0xbe2089f6, 0x3da2aec0,
0xbe9fee66, 0x3e804fe0, 0x3e79a49c, 0x3eb2e8ec, 0x3e42d780, 0x3dcedbf0,
0x3ed8e4e6, 0xbecdb113, 0xbe639ee9, 0xbdb9c6e0, 0x3e8ed458, 0x3e916930,
0xbe591a24, 0xbeaf7709, 0x3e7b2408, 0xbeb32c7f, 0x3ed963f2, 0x3e9be8be,
0xbed64902, 0x3e7795b4, 0xbdc1a118, 0xbd047a58, 0xbb3dfe00, 0xbec14162,
0xbedcd447, 0xbe39af54, 0xbe9aebe8, 0x3e77c3f8, 0xbe9c4230, 0xbead1b0c,
0x3ebc5350, 0xbee1e40d, 0x3ed6d390, 0xbd93fe10, 0xbe5f6ed0, 0x3ebb7968,
0xbeb7e0dd, 0x3eb76f78, 0xbe94b3e4, 0xbedd358e, 0x3ea3e864, 0x3eb4c482,
0xbecc7bce, 0x3e9eff90, 0xbe81ffa5, 0x3eb0cbec};
uint32_t biases0_shape[] = {2, 5};
uint32_t biases0_values[] = {
0xbe18ef8e, 0xbdfff1d8, 0x3e074a8c, 0x3ebc395c, 0x3df90638, 0x3eb037d8,
0x3e8db48a, 0x3e7d66c4, 0x3c6b9ce0, 0xbe0d9648, 0xbe83e971, 0x3e83a64e,
0x3e72843c, 0x3eafbdf8, 0x3e261b9c, 0xbea1a3c0, 0xbe62d375, 0xbebc9a53,
0x3e6ef9a8, 0xbd3aa670, 0xbe58ace5, 0xbe574539, 0xbed87cba, 0x3dc5e080,
0xbe807ef0, 0xbdc724ec, 0x3d737210, 0x3e630230, 0xbea06a9a, 0x3ec02c1a,
0xbecc25a9, 0x3ec8c806, 0xbe94b2a7, 0xbd506310, 0x3e0332e4, 0xbd472b98,
0xbebfde02, 0xbe77c691, 0x3eca9984, 0x3ed16702};
uint32_t hidden_weights0_shape[] = {2, 5, 5};
uint32_t hidden_weights0_values[] = {
0x3e21ba5c, 0x3eb5ccc2, 0xbe7c17a9, 0x3c9a1ea0, 0x3ea04420, 0xbddc3fcc,
0x3e9cab54, 0xbe856c8e, 0x3e9399fc, 0xbe249da6, 0xbdc09e0c, 0xbea1834a,
0x3e1c8044, 0xbead15e7, 0x3ec122ee, 0xbebcb154, 0x3ca36520, 0xbe8872c0,
0xbe9d3169, 0x3e2598d4, 0x3e7822c0, 0xbea16940, 0xbc1fdba0, 0xbe84f4be,
0x3d67f3b0, 0xbeadd2f6, 0xbe8219a4, 0xbe190f96, 0x3cc42620, 0xbed14480,
0x3edb2580, 0xbd8f889c, 0xbdf89d64, 0xbe8d6097, 0xbda3e468, 0x3eade05a,
0x3ec26bea, 0xbdb0b3c4, 0x3cbbc620, 0xbeaa2909, 0xbe26eada, 0x3c976030,
0x3d2f4d98, 0x3e5b9460, 0xbe436636, 0x3e80b7ea, 0xbea1ef22, 0x3ed3c2e8,
0x3e6328f4, 0x3e24fec4, 0xbdb06a40, 0x3bbd8300, 0x3ec95d16, 0x3e90def0,
0x3d448f50, 0xbdb3d438, 0xbe5008a0, 0xbe3acaf4, 0xbd5360c8, 0xbdbfc268,
0xbecd7820, 0x3e2c8218, 0x3ecec37e, 0xbe4c6e38, 0x3ea38344, 0xbec5b44b,
0xbece16e2, 0x3e25d8dc, 0x3eb7196c, 0x3dffaaa8, 0xbd8f518c, 0xbcf73e90,
0xbecea3b7, 0xbeb90843, 0x3e5c0bbc, 0x3ecb999a, 0x3ee4e636, 0x3d527bf8,
0x3e99b77e, 0x3e926d0a, 0xbdea86c4, 0x3e6d851c, 0xbddabb30, 0x3ea2c8c8,
0x3d082a00, 0x3e497f5c, 0x3e8e5746, 0xbe811315, 0xbd507b08, 0xbe58367b,
0x3ecbffd4, 0xbe20471c, 0xbe807b12, 0xbe8480f1, 0x3ed26ffe, 0x3e177f54,
0x3e5bbc44, 0xbeb030cc, 0x3eb0f98a, 0x3e3b45e4, 0xbb5a2c80, 0x3d69b920,
0xbd0388a0, 0x3e62db50, 0x3ba04b00, 0xbdb50e24, 0xbeb31aa8, 0x3ee28262,
0xbea317e4, 0x3ebcdbe4, 0x3d65d3e0, 0xbe700636, 0x3e256d64, 0xbe01fea6,
0x3d64ada0, 0xbdd123b0, 0x3eb8e4f2, 0x3c4f7420, 0xbe19a60a, 0x3ecd16fc,
0xbb4def80, 0xbeb9d294, 0xbec66e3b, 0xbd87a1dc, 0x3e8c10dc, 0xbea299d4,
0xbe24134e, 0xbedc1c4e, 0x3ebe51d8, 0x3ebde4ee, 0x3e2a3b28, 0x3dc7ff90,
0xbede7c01, 0xbe37306c, 0x3e769414, 0x3ec258fa, 0xbeae8cee, 0xbdb9cd1c,
0x3e062014, 0x3ee39938, 0x3cf049b0, 0x3e63833c, 0x3e99f81e, 0x3ca711d0,
0x3e675c3c, 0xbe989ba1, 0xbecfe0c1, 0xbed8b7ed, 0xbece783b, 0xbe972362,
0xbe90c29a, 0x3e1735dc, 0xbedd7037, 0xbe71302b, 0xbe295dd2, 0x3e4d6418,
0x3eabc840, 0x3ea58b16, 0x3d721bf0, 0xbee2f252, 0xbea1dc41, 0xbe136b9a,
0xbebd57dc, 0x3ebd57a4, 0x3e4eb6e0, 0xbe4216de, 0x3a3bae00, 0x3bc9f900,
0xbe05dde4, 0xbe5bef69, 0x3e06b148, 0x3e6bc304, 0xbd9bb79c, 0xbe87f0ac,
0xbe98cd9b, 0x3ebf18f2, 0xbee0d2e5, 0x3e75d434, 0x3e4904fc, 0xbee0a324,
0x3cc4e590, 0xbe325de8, 0x3d7f2f50, 0x3ee1dbc6, 0xbec39102, 0x3e592a78,
0x3dc59638, 0xbe9933c8, 0x3e83aa94, 0x3e55ae7c, 0xbeb798f6, 0xbeb3c7f8,
0xbeb1870e, 0xbd4e46b0, 0xbe81b1a9, 0xbe03b7b6, 0x3e11fa20, 0xbe0d48c0,
0x3e20904c, 0x3e5c50f0};
uint32_t hidden_biases0_shape[] = {2, 5};
uint32_t hidden_biases0_values[] = {
0x3ebdf640, 0xbe1985b8, 0x3e06e404, 0xbdd1716c, 0xbca2ec60, 0xbe76cf7f,
0x3e109bc4, 0x3e3b6830, 0xbe0751fa, 0x3e1940d8, 0x3c696e40, 0xbe9a126e,
0xbeb9158a, 0x3eb682ba, 0x3d952498, 0xbed417e7, 0x3ea17e98, 0x3e658114,
0x3e878898, 0x3ec1d528, 0xbe8b7474, 0x3e1ae0c4, 0x3e10c984, 0xbc172600,
0xbdcdfc84, 0xbe03c1e6, 0x3e8d90bc, 0xbdfe1e6c, 0xbe2ab878, 0x3de69348,
0xbe8d2560, 0x3e9e5868, 0xbeaa740b, 0x3e3b9ae0, 0xbe19fc78, 0x3e226e48,
0xbe5fbd62, 0x3d21ed48, 0x3e84b2c0, 0xbd3641c0};
uint32_t all_ts_out0_shape[] = {5, 2, 2, 5};
uint32_t all_ts_out0_exp_values[] = {
0x3dd6eff8, 0xbda76b31, 0x3c6ef7c1, 0x3a8b388f, 0x3dae9ed6, 0x3e89e8f4,
0xbb4586cb, 0xbc9f3675, 0x3cc8b2ea, 0x3dfe784b, 0x3dbb39fe, 0x3edafae8,
0xbd3c005a, 0xbe072b73, 0x3ef9a34c, 0xbc786331, 0x3eafda4e, 0xbdad66db,
0xbe976dfa, 0x3ee8b575, 0x3c002849, 0xbe597b0f, 0xbcc51dc1, 0x3d32e3f7,
0x3e51f770, 0x3e959d05, 0xbe4629d9, 0xbd0f892a, 0xbb62976b, 0x3de59bf1,
0x3dac9b77, 0x3ee69149, 0xbd32d750, 0xbe094cd1, 0x3f0894c9, 0xbb945ae6,
0x3ec70e1c, 0xbd0701f9, 0xbe079847, 0x3f19f4f5, 0xbdb2f5c6, 0xbe6a4e4b,
0xbd405672, 0x3dd3f4cc, 0x3e178532, 0x3b19974b, 0xbe5f5abf, 0xbdc69fcb,
0x3df1954a, 0x3d01bdbf, 0x3e1c36e6, 0x3ec6ffdf, 0xbbd5b4c9, 0xbd7a61de,
0x3f084cd4, 0xbbf96418, 0x3e21a121, 0x3ca90e88, 0xbe61fed1, 0x3f0518e3,
0xbd13ed8d, 0xbe3bded3, 0xbd99b60d, 0xbd102e12, 0x3dfeec81, 0xbe1233ea,
0xbe61b886, 0xbd95006c, 0x3dfef1ce, 0xbd3d39b1, 0x3e843fa8, 0x3e94e115,
0x3d84b656, 0x3cbd3390, 0x3f0ea868, 0xbc26d2c6, 0x3e28e672, 0x3d9b7799,
0xbc5c9196, 0x3f3aef4b, 0xbd8473f6, 0xbe51d0bc, 0xbd172f10, 0xbb8eeb58,
0x3dcde949, 0xbddf3a74, 0xbe4f4c2e, 0xbe02e647, 0xbca72b3d, 0xbdb6fbf5,
0x3e44f9ce, 0x3ee3b9b2, 0x3d5241b9, 0x3d90a145, 0x3ee73202, 0xbc911f4e,
0xb9c44b8a, 0x3e04e5ab, 0x3e8f7fe2, 0x3f11f168};
uint32_t cell_out0_shape[] = {1, 2, 2, 5};
uint32_t cell_out0_values[] = {
0xbdb0381d, 0xbec713e6, 0xbf77a646, 0xbc19d620, 0x3e8fb6a4,
0xbe499947, 0xbecf624a, 0xbf4b69e0, 0xbd2dd5de, 0xbe1f50a7,
0x3e71bac2, 0x3fe517ec, 0xbdfa26c5, 0xbedb0254, 0x3f1f2b76,
0xbd5f952c, 0x3f279d82, 0xbe9c2544, 0xbf4a2172, 0x3f1eaed8};
zdnn_ztensor *input0 =
alloc_ztensor_with_values(input0_shape, ZDNN_3DS, test_datatype,
NO_CONCAT, false, (void *)input0_values);
zdnn_ztensor *all_ts_out0 =
test_layer(input0, h00_shape, (void *)h00_values, c00_shape,
(void *)c00_values, weights0_shape, (void *)weights0_values,
biases0_shape, (void *)biases0_values, hidden_weights0_shape,
(void *)hidden_weights0_values, hidden_biases0_shape,
(void *)hidden_biases0_values, all_ts_out0_shape,
(void *)all_ts_out0_exp_values, cell_out0_shape,
(void *)cell_out0_values, false, is_layer_bidir[0]);
// second layer
uint32_t h01_shape[] = {1, 2, 4};
uint32_t h01_values[] = {0x3f32172c, 0x3f9edf37, 0x3f2645a8, 0x3fdcb8f3,
0x3fcb4487, 0x3fc0f8ba, 0x3da5dda0, 0x3fa27159};
uint32_t c01_shape[] = {1, 2, 4};
uint32_t c01_values[] = {0x3f805978, 0x3fbe03f3, 0x3ae02000, 0x3cbf1e40,
0x3fe08930, 0x3fe7c408, 0x3fe105ea, 0x3f809e08};
uint32_t weights1_shape[] = {1, 10, 4};
uint32_t weights1_values[] = {
0xbe2bd65c, 0x3dcefba8, 0xbedd7f20, 0x3e861f28, 0xbeed681e, 0x3e426fc0,
0x3eed0f5e, 0xbe6cd8e8, 0x3e51e224, 0x3ecb9a06, 0x3ef28514, 0xbe7c2bdc,
0x3ef9356c, 0x3ec6c0fc, 0x3ee721da, 0xbd332440, 0xbe11c200, 0xbd9946b0,
0xbed7f530, 0xbd44da50, 0xbed3f95a, 0xbee1bb7e, 0xbe413088, 0xbec76b22,
0xbee83510, 0xbee753d8, 0xbeb0114a, 0x3ed56b46, 0x3e004e90, 0x3eef0648,
0x3ef26c56, 0xbe61dab4, 0xbd1b12f0, 0x3e625510, 0xbe5bff04, 0x3e359d30,
0xbe8146d8, 0x3e5f2f40, 0xbe69c184, 0x3edeaa24, 0xbe0282c4, 0xbead1388,
0xbdd7b678, 0xbde41328, 0xbea7904e, 0x3cbe3200, 0xbde4b548, 0xbdb7df18,
0xbe37a3e0, 0xbe3ccf20, 0x3ea2ac3c, 0x3ec89742, 0x3dc17718, 0x3efb36e4,
0x3cc3c6c0, 0x3e80aae6, 0xbe85c1f4, 0xbef2e226, 0xbef93ece, 0x3ed91e6a,
0x3de2e688, 0xbef56ad2, 0xbe9721d8, 0x3e9414ee, 0xbdead1c8, 0x3efc4230,
0xbe2e8a7c, 0xbe1b5134, 0xbe7818b0, 0xbea1f7f8, 0x3e80d2ca, 0xbea9d954,
0x3d8caec8, 0x3dc45320, 0xbea5aa8e, 0xbd1860a0, 0x3ed27f84, 0xbd30c140,
0x3ef1632e, 0x3ed3e00e, 0x3e811ae2, 0x3ee072e2, 0xbe4bac78, 0xbe94dd26,
0x3d90fa50, 0xbda91c00, 0xbeef490e, 0x3ed28f66, 0x3ed9d1c4, 0xbee959b2,
0xbec0dab0, 0xbecba66e, 0x3d89a708, 0xbd00be80, 0x3e5de6f0, 0xbdf65258,
0xbe6ce154, 0x3ea0c574, 0xbe9794be, 0x3e8b418a, 0x3ef22d06, 0x3e050490,
0x3d92e8e0, 0xbe51317c, 0x3df25c60, 0x3e21e58c, 0x3e236d10, 0x3ed70d0e,
0xbef9c638, 0x3d3e3450, 0x3d1101a0, 0xbeb02b06, 0xbe11c318, 0x3e3ee218,
0xbea5fa40, 0xbed6fb44, 0xbeae60fe, 0xbdf97fe8, 0x3ef4d1f0, 0xbe66dee0,
0x3da587b0, 0xbd8cb5c8, 0xbd988fc8, 0xbda24ed0, 0x3eebb6a8, 0x3ec6c4a6,
0xbca26d60, 0xbed4174e, 0xbe746de4, 0xbe1dac84, 0x3eaf4fca, 0x3e7db128,
0x3e371b0c, 0x3ece1200, 0xbe0a8890, 0x3e1927cc, 0xbe005cac, 0xbef7a2f6,
0x3ee06b10, 0x3e4dab88, 0xbdde1128, 0xbd939528, 0xbeaa72aa, 0xbe9deb0a,
0x3ebc3cd8, 0xbdb3dca8, 0xbd5d9d20, 0x3ea4c2ae, 0xbec6657a, 0x3e25ee78,
0xbcdc0000, 0x3ef278c0, 0x3d598660, 0x3e48df24, 0x3e6475a0, 0x3d31b530,
0x3ef0bea8, 0x3ec48fa6, 0x3eaf0566, 0xbeef1dcc};
uint32_t biases1_shape[] = {1, 4};
uint32_t biases1_values[] = {0x3e382eac, 0xbeb35b5c, 0xbddc93c8, 0x3ede19b8,
0x3d9db078, 0x3e997152, 0xbee6ceb4, 0x3ee76a6a,
0xbec4697e, 0xbe15a55c, 0x3e27ed08, 0xbee0471c,
0x3e8c56b8, 0x3e85429c, 0x3e9ec5ca, 0xbea3364a};
uint32_t hidden_weights1_shape[] = {1, 4, 4};
uint32_t hidden_weights1_values[] = {
0xbedeecba, 0x3ccc9720, 0x3ecf9ed2, 0xbe92441c, 0xbeeae27c, 0x3e0acf3c,
0xbebdaa84, 0x3df2e668, 0x3efa0328, 0x3eae02ee, 0xbda40fe0, 0x3ef04b3c,
0xbdff6298, 0x3eda7d48, 0xbe977c1e, 0xbecd1526, 0x3eb3b59c, 0xbe6fa27c,
0xbea24a9c, 0xbe74491c, 0xbebdcfc0, 0x3e3246b0, 0xbd5d7530, 0x3ea400ba,
0x3deb6398, 0xbee4f98a, 0x3d83b748, 0xbd821528, 0x3d94ce30, 0x3de939c8,
0x3eda1908, 0xbe7329bc, 0x3e9aeeae, 0xbde79930, 0xbd845f50, 0xbecb234c,
0xbe84ba3c, 0x3d3a7b70, 0xbebb3c68, 0x3cf98660, 0xbdc772e8, 0xbeb2f3cc,
0x3e15eb3c, 0x3ecaf7cc, 0x3ecb3492, 0x3ed9eaec, 0xbeb6053c, 0xbe10e348,
0x3e70fb40, 0xbd608060, 0x3ec09f96, 0xbe5da7e8, 0x3edbfc7a, 0xbe211e60,
0x3ed7af1a, 0x3ec13d5a, 0x3ea9cb78, 0xbecddb00, 0x3d3f1470, 0xbe550c2c,
0xbe8649a2, 0x3958a000, 0x3de892f0, 0xbeb1d4c4};
uint32_t hidden_biases1_shape[] = {1, 4};
uint32_t hidden_biases1_values[] = {
0xbee4169c, 0x3e9d3bf8, 0x3d560ae0, 0x3cec4ba0, 0x3efef9ec, 0xbe97bf38,
0x3eff933e, 0xbef5ae46, 0xbe8dc31c, 0xbe56c57c, 0xbe15b3d0, 0xbef96240,
0x3cd4dd20, 0x3db51a80, 0xb9ddf000, 0x3e255720};
uint32_t all_ts_out1_shape[] = {5, 1, 2, 4};
uint32_t all_ts_out1_exp_values[] = {
0x3d83a5ef, 0x3ec5ab29, 0xbe96f2a9, 0xbdf8dcd8, 0x3de2bd50, 0x3ed99a83,
0x3e415f85, 0x3cfbc484, 0xbe8c59a2, 0x3e5053ab, 0x3c0308c4, 0xbe9ffe19,
0xbe8faac0, 0x3e83c5a5, 0x3e14e22c, 0xbe1cad7e, 0xbec9f76f, 0xbd0a8fd7,
0x3e1e0707, 0xbebd7c2d, 0xbecd1305, 0x3c73fc9e, 0x3e550bbb, 0xbe8989fa,
0xbedd0f9f, 0xbe0144b8, 0x3e53529a, 0xbebd2a6c, 0xbef77903, 0xbe2d44ac,
0x3e92e511, 0xbea59a45, 0xbee650d7, 0xbe2e4cba, 0x3e64bf3b, 0xbecdd76e,
0xbefc2978, 0xbe72658c, 0x3e8fef57, 0xbeb46b98};
uint32_t cell_out1_shape[] = {1, 1, 2, 4};
uint32_t cell_out1_values[] = {0xbf638f5a, 0xbe9a2d53, 0x3edea444,
0xbf91ec0e, 0xbf8d6301, 0xbec78653,
0x3f0e8a9f, 0xbf885ceb};
zdnn_ztensor *all_ts_out1 = test_layer(
all_ts_out0, h01_shape, (void *)h01_values, c01_shape, (void *)c01_values,
weights1_shape, (void *)weights1_values, biases1_shape,
(void *)biases1_values, hidden_weights1_shape,
(void *)hidden_weights1_values, hidden_biases1_shape,
(void *)hidden_biases1_values, all_ts_out1_shape,
(void *)all_ts_out1_exp_values, cell_out1_shape, (void *)cell_out1_values,
is_layer_bidir[0], is_layer_bidir[1]);
free_ztensor_buffers(3, input0, all_ts_out0, all_ts_out1);
}
int main() {
UNITY_BEGIN();
RUN_TEST_ALL_DLFLOAT16_PRE_DATATYPES(lstm_fwd_to_fwd);
RUN_TEST_ALL_DLFLOAT16_PRE_DATATYPES(lstm_fwd_to_bidir);
RUN_TEST_ALL_DLFLOAT16_PRE_DATATYPES(lstm_bidir_to_bidir);
RUN_TEST_ALL_DLFLOAT16_PRE_DATATYPES(lstm_bidir_to_fwd);
return UNITY_END();
}
zDNN-1.1.2/tests/testDriver_zdnn_lstm_rnn.c 0000664 0000000 0000000 00000103664 15000221702 0020731 0 ustar 00root root 0000000 0000000 // SPDX-License-Identifier: Apache-2.0
/*
* Copyright IBM Corp. 2021, 2024
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#include "common_rnn.h"
/******************************************************************************
default_input
******************************************************************************/
uint32_t default_input_shape[] = {5, 2, 4};
/* Visualization of values in shape (timestep, batch, feature) order
[
[ # timestep_0
[.000, .001, .002, .003], # batch_0
[.010, .011, .012, .013], # batch_1
# feat_0 feat_1 feat_2 feat_3
],
[ # timestep_1
[.100, .101, .102, .103], # batch_0
[.110, .111, .112, .113], # batch 1
# feat_0 feat_1 feat_2 feat_3
],
[ # timestep_2
[.200, .201, .202, .203], # batch_0
[.210, .211, .212, .213], # batch_1
# feat_0 feat_1 feat_2 feat_3
],
[ # timestep_3
[.300, .301, .302, .303], # batch_0
[.310, .311, .312, .313], # batch_1
# feat_0 feat_1 feat_2 feat_3
],
[ # timestep_4
[.400, .401, .402, .403], # batch_0
[.410, .411, .412, .413], # batch_1
# feat_0 feat_1 feat_2 feat_3
],
]
*/
float default_input_values[] = {
0.0, 0.001, 0.002, 0.003, 0.01, 0.011, 0.012, 0.013, 0.1, 0.101,
0.102, 0.103, 0.11, 0.111, 0.112, 0.113, 0.2, 0.201, 0.202, 0.203,
0.21, 0.211, 0.212, 0.213, 0.3, 0.301, 0.302, 0.303, 0.31, 0.311,
0.312, 0.313, 0.4, 0.401, 0.402, 0.403, 0.41, 0.411, 0.412, 0.413};
/******************************************************************************
default_uni_h0_shape
******************************************************************************/
uint32_t default_uni_h0_shape[] = {1, 2, 3};
/* Visualization of values in shape order
[[[0. 0. 0.]
[0. 0. 0.]]]
*/
float default_uni_h0_values[] = {0.0, 0.0, 0.0, 0.0, 0.0, 0.0};
/******************************************************************************
default_uni_c0_shape
******************************************************************************/
uint32_t default_uni_c0_shape[] = {1, 2, 3};
/* Visualization of values in shape order
[[[0. 0. 0.]
[0. 0. 0.]]]
*/
float default_uni_c0_values[] = {0.0, 0.0, 0.0, 0.0, 0.0, 0.0};
/******************************************************************************
default_uni_input_weights
******************************************************************************/
uint32_t default_uni_input_weights_shape[] = {1, 4, 3};
/* Visualization of f concatenation values in shape order
[[[-0.4937358 0.5553266 0.1960275]
[ 0.1839888 0.1733883 -0.2754271]
[ 0.2482673 -0.5119551 -0.5303364]
[ 0.0915996 0.4851032 0.329131 ]]]
*/
float default_uni_input_weights_f_values[] = {
-0.4937358, 0.5553266, 0.1960275, 0.1839888, 0.1733883, -0.2754271,
0.2482673, -0.5119551, -0.5303364, 0.0915996, 0.4851032, 0.329131};
/* Visualization of i concatenation values in shape order
[[[ 0.381342 0.4850937 -0.5389395]
[-0.4317299 -0.44266 0.5706354]
[ 0.4705055 -0.3875273 0.1228931]
[ 0.3694199 0.2747256 0.0745605]]]
*/
float default_uni_input_weights_i_values[] = {
0.381342, 0.4850937, -0.5389395, -0.4317299, -0.44266, 0.5706354,
0.4705055, -0.3875273, 0.1228931, 0.3694199, 0.2747256, 0.0745605};
/* Visualization of c concatenation values in shape order
[[[ 0.548669 -0.2726471 -0.5263513]
[-0.4730297 -0.1263285 -0.0133806]
[ 0.0315526 -0.385514 0.3423259]
[ 0.2071373 -0.2729528 0.2808076]]]
*/
float default_uni_input_weights_c_values[] = {
0.548669, -0.2726471, -0.5263513, -0.4730297, -0.1263285, -0.0133806,
0.0315526, -0.385514, 0.3423259, 0.2071373, -0.2729528, 0.2808076};
/* Visualization of o concatenation values in shape order
[[[ 0.5423677 0.0945408 0.4383084]
[-0.5070595 -0.1628114 0.4629621]
[-0.0710383 -0.5199673 0.4833339]
[ 0.5621256 0.2686667 0.113032 ]]]
*/
float default_uni_input_weights_o_values[] = {
0.5423677, 0.0945408, 0.4383084, -0.5070595, -0.1628114, 0.4629621,
-0.0710383, -0.5199673, 0.4833339, 0.5621256, 0.2686667, 0.113032};
/******************************************************************************
default_uni_input_biases
******************************************************************************/
uint32_t default_uni_input_biases_shape[] = {1, 3};
/* Visualization of f concatenation values in shape order
[[-0.1775665 0.0771791 -0.2241169]]
*/
float default_uni_input_biases_f_values[] = {-0.1775665, 0.0771791, -0.2241169};
/* Visualization of i concatenation values in shape order
[[ 0.3968375 -0.4157575 -0.3188125]]
*/
float default_uni_input_biases_i_values[] = {0.3968375, -0.4157575, -0.3188125};
/* Visualization of c concatenation values in shape order
[[-0.3590846 -0.1054496 -0.2817501]]
*/
float default_uni_input_biases_c_values[] = {-0.3590846, -0.1054496,
-0.2817501};
/* Visualization of o concatenation values in shape order
[[ 0.0158953 -0.4273889 -0.1443277]]
*/
float default_uni_input_biases_o_values[] = {0.0158953, -0.4273889, -0.1443277};
/******************************************************************************
default_uni_hidden_weights
******************************************************************************/
uint32_t default_uni_hidden_weights_shape[] = {1, 3, 3};
/* Visualization of f concatenation values in shape order
[[[-0.3689663 -0.3204532 -0.1866051]
[-0.3069769 -0.3292732 -0.392639 ]
[ 0.5463605 -0.1544762 0.4665768]]]
*/
float default_uni_hidden_weights_f_values[] = {
-0.3689663, -0.3204532, -0.1866051, -0.3069769, -0.3292732,
-0.392639, 0.5463605, -0.1544762, 0.4665768};
/* Visualization of i concatenation values in shape order
[[[ 0.4114995 -0.049397 0.3073992]
[-0.1453276 -0.1190602 0.233599 ]
[ 0.4688771 -0.2869941 0.3672419]]]
*/
float default_uni_hidden_weights_i_values[] = {
0.4114995, -0.049397, 0.3073992, -0.1453276, -0.1190602,
0.233599, 0.4688771, -0.2869941, 0.3672419};
/* Visualization of c concatenation values in shape order
[[[ 0.0643551 -0.3741214 -0.0919193]
[ 0.2632221 0.4407408 0.4369227]
[ 0.4282453 -0.2892259 0.5323023]]]
*/
float default_uni_hidden_weights_c_values[] = {
0.0643551, -0.3741214, -0.0919193, 0.2632221, 0.4407408,
0.4369227, 0.4282453, -0.2892259, 0.5323023};
/* Visualization of o concatenation values in shape order
[[[ 0.5068286 -0.2080224 -0.0424343]
[ 0.3320496 -0.0367477 -0.0702022]
[ 0.5366269 -0.1974721 0.3084639]]]
*/
float default_uni_hidden_weights_o_values[] = {
0.5068286, -0.2080224, -0.0424343, 0.3320496, -0.0367477,
-0.0702022, 0.5366269, -0.1974721, 0.3084639};
/******************************************************************************
default_uni_hidden_biases
******************************************************************************/
uint32_t default_uni_hidden_biases_shape[] = {1, 3};
/* Visualization of f concatenation values in shape order
[[ 0.3785818 -0.186314 -0.5293279]]
*/
float default_uni_hidden_biases_f_values[] = {0.3785818, -0.186314, -0.5293279};
/* Visualization of i concatenation values in shape order
[[-0.2130262 -0.0797516 0.4536392]]
*/
float default_uni_hidden_biases_i_values[] = {-0.2130262, -0.0797516,
0.4536392};
/* Visualization of c concatenation values in shape order
[[-0.4129714 -0.4429338 -0.0547802]]
*/
float default_uni_hidden_biases_c_values[] = {-0.4129714, -0.4429338,
-0.0547802};
/* Visualization of o concatenation values in shape order
[[-0.2563944 -0.4034805 0.1280097]]
*/
float default_uni_hidden_biases_o_values[] = {-0.2563944, -0.4034805,
0.1280097};
/******************************************************************************
default_bidir_h0
******************************************************************************/
uint32_t default_bidir_h0_shape[] = {2, 2, 3};
/* Visualization of values in shape order
[[[0. 0. 0.]
[0. 0. 0.]]
[[0. 0. 0.]
[0. 0. 0.]]]
*/
float default_bidir_h0_values[] = {0.0, 0.0, 0.0, 0.0, 0.0, 0.0,
0.0, 0.0, 0.0, 0.0, 0.0, 0.0};
/******************************************************************************
default_bidir_c0
******************************************************************************/
uint32_t default_bidir_c0_shape[] = {2, 2, 3};
/* Visualization of values in shape order
[[[0. 0. 0.]
[0. 0. 0.]]
[[0. 0. 0.]
[0. 0. 0.]]]
*/
float default_bidir_c0_values[] = {0.0, 0.0, 0.0, 0.0, 0.0, 0.0,
0.0, 0.0, 0.0, 0.0, 0.0, 0.0};
/******************************************************************************
default_bidir_input_weights
******************************************************************************/
uint32_t default_bidir_input_weights_shape[] = {2, 4, 3};
/* Visualization of f concatenation values in shape order
[[[-0.4937358 0.5553266 0.1960275]
[ 0.1839888 0.1733883 -0.2754271]
[ 0.2482673 -0.5119551 -0.5303364]
[ 0.0915996 0.4851032 0.329131 ]]
[[-0.4937358 0.5553266 0.1960275]
[ 0.1839888 0.1733883 -0.2754271]
[ 0.2482673 -0.5119551 -0.5303364]
[ 0.0915996 0.4851032 0.329131 ]]]
*/
float default_bidir_input_weights_f_values[] = {
-0.4937358, 0.5553266, 0.1960275, 0.1839888, 0.1733883, -0.2754271,
0.2482673, -0.5119551, -0.5303364, 0.0915996, 0.4851032, 0.329131,
-0.4937358, 0.5553266, 0.1960275, 0.1839888, 0.1733883, -0.2754271,
0.2482673, -0.5119551, -0.5303364, 0.0915996, 0.4851032, 0.329131};
/* Visualization of i concatenation values in shape order
[[[ 0.381342 0.4850937 -0.5389395]
[-0.4317299 -0.44266 0.5706354]
[ 0.4705055 -0.3875273 0.1228931]
[ 0.3694199 0.2747256 0.0745605]]
[[ 0.381342 0.4850937 -0.5389395]
[-0.4317299 -0.44266 0.5706354]
[ 0.4705055 -0.3875273 0.1228931]
[ 0.3694199 0.2747256 0.0745605]]]
*/
float default_bidir_input_weights_i_values[] = {
0.381342, 0.4850937, -0.5389395, -0.4317299, -0.44266, 0.5706354,
0.4705055, -0.3875273, 0.1228931, 0.3694199, 0.2747256, 0.0745605,
0.381342, 0.4850937, -0.5389395, -0.4317299, -0.44266, 0.5706354,
0.4705055, -0.3875273, 0.1228931, 0.3694199, 0.2747256, 0.0745605};
/* Visualization of c concatenation values in shape order
[[[ 0.548669 -0.2726471 -0.5263513]
[-0.4730297 -0.1263285 -0.0133806]
[ 0.0315526 -0.385514 0.3423259]
[ 0.2071373 -0.2729528 0.2808076]]
[[ 0.548669 -0.2726471 -0.5263513]
[-0.4730297 -0.1263285 -0.0133806]
[ 0.0315526 -0.385514 0.3423259]
[ 0.2071373 -0.2729528 0.2808076]]]
*/
float default_bidir_input_weights_c_values[] = {
0.548669, -0.2726471, -0.5263513, -0.4730297, -0.1263285, -0.0133806,
0.0315526, -0.385514, 0.3423259, 0.2071373, -0.2729528, 0.2808076,
0.548669, -0.2726471, -0.5263513, -0.4730297, -0.1263285, -0.0133806,
0.0315526, -0.385514, 0.3423259, 0.2071373, -0.2729528, 0.2808076};
/* Visualization of o concatenation values in shape order
[[[ 0.5423677 0.0945408 0.4383084]
[-0.5070595 -0.1628114 0.4629621]
[-0.0710383 -0.5199673 0.4833339]
[ 0.5621256 0.2686667 0.113032 ]]
[[ 0.5423677 0.0945408 0.4383084]
[-0.5070595 -0.1628114 0.4629621]
[-0.0710383 -0.5199673 0.4833339]
[ 0.5621256 0.2686667 0.113032 ]]]
*/
float default_bidir_input_weights_o_values[] = {
0.5423677, 0.0945408, 0.4383084, -0.5070595, -0.1628114, 0.4629621,
-0.0710383, -0.5199673, 0.4833339, 0.5621256, 0.2686667, 0.113032,
0.5423677, 0.0945408, 0.4383084, -0.5070595, -0.1628114, 0.4629621,
-0.0710383, -0.5199673, 0.4833339, 0.5621256, 0.2686667, 0.113032};
/******************************************************************************
default_bidir_input_biases
******************************************************************************/
uint32_t default_bidir_input_biases_shape[] = {2, 3};
/* Visualization of f concatenation values in shape order
[[-0.1775665 0.0771791 -0.2241169]
[-0.1775665 0.0771791 -0.2241169]]
*/
float default_bidir_input_biases_f_values[] = {
-0.1775665, 0.0771791, -0.2241169, -0.1775665, 0.0771791, -0.2241169};
;
/* Visualization of i concatenation values in shape order
[[ 0.3968375 -0.4157575 -0.3188125]
[ 0.3968375 -0.4157575 -0.3188125]]
*/
float default_bidir_input_biases_i_values[] = {
0.3968375, -0.4157575, -0.3188125, 0.3968375, -0.4157575, -0.3188125};
/* Visualization of c concatenation values in shape order
[[-0.3590846 -0.1054496 -0.2817501]
[-0.3590846 -0.1054496 -0.2817501]]
*/
float default_bidir_input_biases_c_values[] = {
-0.3590846, -0.1054496, -0.2817501, -0.3590846, -0.1054496, -0.2817501};
/* Visualization of o concatenation values in shape order
[[ 0.0158953 -0.4273889 -0.1443277]
[ 0.0158953 -0.4273889 -0.1443277]]
*/
float default_bidir_input_biases_o_values[] = {
0.0158953, -0.4273889, -0.1443277, 0.0158953, -0.4273889, -0.1443277};
/******************************************************************************
default_uni_hidden_weights
******************************************************************************/
uint32_t default_bidir_hidden_weights_shape[] = {2, 3, 3};
/* Visualization of f concatenation values in shape order
[[[-0.3689663 -0.3204532 -0.1866051]
[-0.3069769 -0.3292732 -0.392639 ]
[ 0.5463605 -0.1544762 0.4665768]]
[[-0.3689663 -0.3204532 -0.1866051]
[-0.3069769 -0.3292732 -0.392639 ]
[ 0.5463605 -0.1544762 0.4665768]]]
*/
float default_bidir_hidden_weights_f_values[] = {
-0.3689663, -0.3204532, -0.1866051, -0.3069769, -0.3292732, -0.392639,
0.5463605, -0.1544762, 0.4665768, -0.3689663, -0.3204532, -0.1866051,
-0.3069769, -0.3292732, -0.392639, 0.5463605, -0.1544762, 0.4665768};
/* Visualization of i concatenation values in shape order
[[[ 0.4114995 -0.049397 0.3073992]
[-0.1453276 -0.1190602 0.233599 ]
[ 0.4688771 -0.2869941 0.3672419]]
[[ 0.4114995 -0.049397 0.3073992]
[-0.1453276 -0.1190602 0.233599 ]
[ 0.4688771 -0.2869941 0.3672419]]]
*/
float default_bidir_hidden_weights_i_values[] = {
0.4114995, -0.049397, 0.3073992, -0.1453276, -0.1190602, 0.233599,
0.4688771, -0.2869941, 0.3672419, 0.4114995, -0.049397, 0.3073992,
-0.1453276, -0.1190602, 0.233599, 0.4688771, -0.2869941, 0.3672419};
/* Visualization of c concatenation values in shape order
[[[ 0.0643551 -0.3741214 -0.0919193]
[ 0.2632221 0.4407408 0.4369227]
[ 0.4282453 -0.2892259 0.5323023]]
[[ 0.0643551 -0.3741214 -0.0919193]
[ 0.2632221 0.4407408 0.4369227]
[ 0.4282453 -0.2892259 0.5323023]]]
*/
float default_bidir_hidden_weights_c_values[] = {
0.0643551, -0.3741214, -0.0919193, 0.2632221, 0.4407408, 0.4369227,
0.4282453, -0.2892259, 0.5323023, 0.0643551, -0.3741214, -0.0919193,
0.2632221, 0.4407408, 0.4369227, 0.4282453, -0.2892259, 0.5323023};
/* Visualization of o concatenation values in shape order
[[[ 0.5068286 -0.2080224 -0.0424343]
[ 0.3320496 -0.0367477 -0.0702022]
[ 0.5366269 -0.1974721 0.3084639]]
[[ 0.5068286 -0.2080224 -0.0424343]
[ 0.3320496 -0.0367477 -0.0702022]
[ 0.5366269 -0.1974721 0.3084639]]]
*/
float default_bidir_hidden_weights_o_values[] = {
0.5068286, -0.2080224, -0.0424343, 0.3320496, -0.0367477, -0.0702022,
0.5366269, -0.1974721, 0.3084639, 0.5068286, -0.2080224, -0.0424343,
0.3320496, -0.0367477, -0.0702022, 0.5366269, -0.1974721, 0.3084639};
/******************************************************************************
default_bidir_hidden_biases
******************************************************************************/
uint32_t default_bidir_hidden_biases_shape[] = {2, 3};
/* Visualization of f concatenation values in shape order
[[ 0.3785818 -0.186314 -0.5293279]
[ 0.3785818 -0.186314 -0.5293279]]
*/
float default_bidir_hidden_biases_f_values[] = {
0.3785818, -0.186314, -0.5293279, 0.3785818, -0.186314, -0.5293279};
/* Visualization of i concatenation values in shape order
[[-0.2130262 -0.0797516 0.4536392]
[-0.2130262 -0.0797516 0.4536392]]
*/
float default_bidir_hidden_biases_i_values[] = {
-0.2130262, -0.0797516, 0.4536392, -0.2130262, -0.0797516, 0.4536392};
/* Visualization of c concatenation values in shape order
[[-0.4129714 -0.4429338 -0.0547802]
[-0.4129714 -0.4429338 -0.0547802]]
*/
float default_bidir_hidden_biases_c_values[] = {
-0.4129714, -0.4429338, -0.0547802, -0.4129714, -0.4429338, -0.0547802};
/* Visualization of o concatenation values in shape order
[[-0.2563944 -0.4034805 0.1280097]
[-0.2563944 -0.4034805 0.1280097]]
*/
float default_bidir_hidden_biases_o_values[] = {
-0.2563944, -0.4034805, 0.1280097, -0.2563944, -0.4034805, 0.1280097};
/******************************************************************************
default_fwd_exp_hn_out_all_ts
******************************************************************************/
uint32_t default_fwd_hn_out_all_ts_shape[] = {5, 1, 2, 3};
/* Visualization of values in shape order
[[[-0.1496885 -0.0568049 -0.0847668]
[-0.1502335 -0.057525 -0.0853017]]
[[-0.212243 -0.0906312 -0.1264551]
[-0.2129832 -0.0917483 -0.1272719]]
[[-0.2460073 -0.1145757 -0.1504627]
[-0.2468257 -0.115835 -0.1514198]]
[[-0.2677511 -0.1334158 -0.1669724]
[-0.2686036 -0.1346632 -0.1679834]]
[[-0.2836966 -0.1488931 -0.180066 ]
[-0.2845615 -0.1500451 -0.1810745]]]
*/
float default_fwd_exp_hn_out_all_ts_values[] = {
-0.1496885, -0.0568049, -0.0847668, -0.1502335, -0.057525, -0.0853017,
-0.212243, -0.0906312, -0.1264551, -0.2129832, -0.0917483, -0.1272719,
-0.2460073, -0.1145757, -0.1504627, -0.2468257, -0.115835, -0.1514198,
-0.2677511, -0.1334158, -0.1669724, -0.2686036, -0.1346632, -0.1679834,
-0.2836966, -0.1488931, -0.180066, -0.2845615, -0.1500451, -0.1810745};
/******************************************************************************
default_fwd_exp_hn_out_final_ts
******************************************************************************/
uint32_t default_fwd_hn_out_final_ts_shape[] = {1, 1, 2, 3};
/* Visualization of values in shape order
[[[-0.2836966 -0.1488931 -0.180066 ]
[-0.2845615 -0.1500451 -0.1810745]]]
*/
float default_fwd_exp_hn_out_final_ts_values[] = {
-0.2836966, -0.1488931, -0.180066, -0.2845615, -0.1500451, -0.1810745};
/******************************************************************************
default_fwd_cf_exp_out
******************************************************************************/
uint32_t default_fwd_cf_out_shape[] = {1, 1, 2, 3};
/* Visualization of values in shape order
[[[-0.8036579 -0.552912 -0.2915583]
[-0.8046424 -0.5594633 -0.2916239]]]
*/
float default_fwd_exp_cf_out_values[] = {-0.8036579, -0.552912, -0.2915583,
-0.8046424, -0.5594633, -0.2916239};
/******************************************************************************
default_bwd_exp_hn_out_all_ts
******************************************************************************/
uint32_t default_bwd_hn_out_all_ts_shape[] = {5, 1, 2, 3};
/* Visualization of values in shape order
[[[-0.2486852 -0.1223668 -0.1448121]
[-0.2495632 -0.1242222 -0.1459369]]
[[-0.2501265 -0.1314582 -0.1518588]
[-0.2509633 -0.1329102 -0.1529005]]
[[-0.2448045 -0.1305399 -0.1532898]
[-0.2455692 -0.1315801 -0.1541975]]
[[-0.2248478 -0.1148318 -0.1424497]
[-0.2254719 -0.1154587 -0.14315 ]]
[[-0.1676665 -0.0753414 -0.1037449]
[-0.1679938 -0.0755724 -0.1041366]]]
*/
float default_bwd_exp_hn_out_all_ts_values[] = {
-0.2486852, -0.1223668, -0.1448121, -0.2495632, -0.1242222, -0.1459369,
-0.2501265, -0.1314582, -0.1518588, -0.2509633, -0.1329102, -0.1529005,
-0.2448045, -0.1305399, -0.1532898, -0.2455692, -0.1315801, -0.1541975,
-0.2248478, -0.1148318, -0.1424497, -0.2254719, -0.1154587, -0.14315,
-0.1676665, -0.0753414, -0.1037449, -0.1679938, -0.0755724, -0.1041366};
/******************************************************************************
default_bwd_exp_hn_out_final_ts
******************************************************************************/
uint32_t default_bwd_hn_out_final_ts_shape[] = {1, 1, 2, 3};
/* Visualization of values in shape order
[[[-0.2486852 -0.1223668 -0.1448121]
[-0.2495632 -0.1242222 -0.1459369]]]
*/
float default_bwd_exp_hn_out_final_ts_values[] = {
-0.2486852, -0.1223668, -0.1448121, -0.2495632, -0.1242222, -0.1459369};
/******************************************************************************
default_bwd_exp_cf_out
******************************************************************************/
uint32_t default_bwd_cf_out_shape[] = {1, 1, 2, 3};
/* Visualization of values in shape order
[[[-0.7843156 -0.4000301 -0.3048753]
[-0.7856599 -0.4076315 -0.3049449]]]
*/
float default_bwd_exp_cf_out_values[] = {-0.7843156, -0.4000301, -0.3048753,
-0.7856599, -0.4076315, -0.3049449};
/******************************************************************************
default_bidir_exp_hn_out_all_ts
******************************************************************************/
uint32_t default_bidir_hn_out_all_ts_shape[] = {5, 2, 2, 3};
/* Visualization of values in shape order
[[[-0.1496885 -0.0568049 -0.0847668 -0.1502335 -0.057525 -0.0853017]
[-0.2486852 -0.1223668 -0.1448121 -0.2495632 -0.1242222 -0.1459369]]
[[-0.212243 -0.0906312 -0.1264551 -0.2129832 -0.0917483 -0.1272719]
[-0.2501265 -0.1314583 -0.1518588 -0.2509633 -0.1329102 -0.1529005]]
[[-0.2460073 -0.1145757 -0.1504627 -0.2468257 -0.115835 -0.1514198]
[-0.2448045 -0.1305399 -0.1532898 -0.2455692 -0.1315801 -0.1541975]]
[[-0.2677511 -0.1334158 -0.1669723 -0.2686036 -0.1346633 -0.1679834]
[-0.2248478 -0.1148318 -0.1424497 -0.2254719 -0.1154587 -0.14315 ]]
[[-0.2836966 -0.1488931 -0.180066 -0.2845615 -0.1500451 -0.1810745]
[-0.1676665 -0.0753414 -0.1037448 -0.1679938 -0.0755724 -0.1041366]]]
*/
float default_bidir_exp_hn_out_all_ts_values[] = {
-0.1496885, -0.0568049, -0.0847668, -0.1502335, -0.057525, -0.0853017,
-0.2486852, -0.1223668, -0.1448121, -0.2495632, -0.1242222, -0.1459369,
-0.212243, -0.0906312, -0.1264551, -0.2129832, -0.0917483, -0.1272719,
-0.2501265, -0.1314583, -0.1518588, -0.2509633, -0.1329102, -0.1529005,
-0.2460073, -0.1145757, -0.1504627, -0.2468257, -0.115835, -0.1514198,
-0.2448045, -0.1305399, -0.1532898, -0.2455692, -0.1315801, -0.1541975,
-0.2677511, -0.1334158, -0.1669723, -0.2686036, -0.1346633, -0.1679834,
-0.2248478, -0.1148318, -0.1424497, -0.2254719, -0.1154587, -0.14315,
-0.2836966, -0.1488931, -0.180066, -0.2845615, -0.1500451, -0.1810745,
-0.1676665, -0.0753414, -0.1037448, -0.1679938, -0.0755724, -0.1041366};
/******************************************************************************
default_bidir_exp_hn_out_final_ts
******************************************************************************/
uint32_t default_bidir_hn_out_final_ts_shape[] = {1, 2, 2, 3};
/* Visualization of values in shape order
[[[-0.2836966 -0.1488931 -0.180066 -0.2845615 -0.1500451 -0.1810745]
[-0.2486852 -0.1223668 -0.1448121 -0.2495632 -0.1242222 -0.1459369]]]
*/
float default_bidir_exp_hn_out_final_ts_values[] = {
-0.2836966, -0.1488931, -0.180066, -0.2845615, -0.1500451, -0.1810745,
-0.2486852, -0.1223668, -0.1448121, -0.2495632, -0.1242222, -0.1459369};
/******************************************************************************
default_bidir_cf_exp_out
******************************************************************************/
uint32_t default_bidir_cf_out_shape[] = {1, 2, 2, 3};
/* Visualization of values in shape order
[[[-0.8036579 -0.552912 -0.2915582 -0.8046424 -0.5594633 -0.2916239]
[-0.7843156 -0.4000301 -0.3048753 -0.7856599 -0.4076315 -0.3049449]]]
*/
float default_bidir_exp_cf_out_values[] = {
-0.8036579, -0.552912, -0.2915582, -0.8046424, -0.5594633, -0.2916239,
-0.7843156, -0.4000301, -0.3048753, -0.7856599, -0.4076315, -0.3049449};
/******************************************************************************
Unity Methods
******************************************************************************/
void setUp(void) { VERIFY_HW_ENV; }
void tearDown(void) {}
/******************************************************************************
Tests
******************************************************************************/
// Confirm that lstm returns OK and expected values when set to return hn
// results from all timesteps
void lstm_basic_fwd_hn_all() {
test_zdnn_api_lstm_gru(
NNPA_LSTMACT,
default_input_shape, ZDNN_3DS, default_input_values,
default_uni_h0_shape, ZDNN_3DS, default_uni_h0_values,
default_uni_c0_shape, ZDNN_3DS, default_uni_c0_values,
default_uni_input_weights_shape, ZDNN_3DS,
default_uni_input_weights_f_values, default_uni_input_weights_i_values,
default_uni_input_weights_c_values, default_uni_input_weights_o_values,
default_uni_input_biases_shape, ZDNN_2DS,
default_uni_input_biases_f_values, default_uni_input_biases_i_values,
default_uni_input_biases_c_values, default_uni_input_biases_o_values,
default_uni_hidden_weights_shape, ZDNN_3DS,
default_uni_hidden_weights_f_values, default_uni_hidden_weights_i_values,
default_uni_hidden_weights_c_values, default_uni_hidden_weights_o_values,
default_uni_hidden_biases_shape, ZDNN_2DS,
default_uni_hidden_biases_f_values, default_uni_hidden_biases_i_values,
default_uni_hidden_biases_c_values, default_uni_hidden_biases_o_values,
default_fwd_hn_out_all_ts_shape, ZDNN_4DS,
default_fwd_exp_hn_out_all_ts_values,
default_fwd_cf_out_shape, ZDNN_4DS, default_fwd_exp_cf_out_values,
FWD, ZDNN_OK);
}
// Confirm that lstm returns OK and expected values when set to return only the
// final hn result
void lstm_basic_fwd_hn_final() {
test_zdnn_api_lstm_gru(
NNPA_LSTMACT,
default_input_shape, ZDNN_3DS, default_input_values,
default_uni_h0_shape, ZDNN_3DS, default_uni_h0_values,
default_uni_c0_shape, ZDNN_3DS, default_uni_c0_values,
default_uni_input_weights_shape, ZDNN_3DS,
default_uni_input_weights_f_values, default_uni_input_weights_i_values,
default_uni_input_weights_c_values, default_uni_input_weights_o_values,
default_uni_input_biases_shape, ZDNN_2DS,
default_uni_input_biases_f_values, default_uni_input_biases_i_values,
default_uni_input_biases_c_values, default_uni_input_biases_o_values,
default_uni_hidden_weights_shape, ZDNN_3DS,
default_uni_hidden_weights_f_values, default_uni_hidden_weights_i_values,
default_uni_hidden_weights_c_values, default_uni_hidden_weights_o_values,
default_uni_hidden_biases_shape, ZDNN_2DS,
default_uni_hidden_biases_f_values, default_uni_hidden_biases_i_values,
default_uni_hidden_biases_c_values, default_uni_hidden_biases_o_values,
default_fwd_hn_out_final_ts_shape, ZDNN_4DS,
default_fwd_exp_hn_out_final_ts_values,
default_fwd_cf_out_shape, ZDNN_4DS, default_fwd_exp_cf_out_values,
FWD, ZDNN_OK);
}
// Confirm that lstm returns OK and expected values when set to return hn
// results from all timesteps
void lstm_basic_bwd_hn_all() {
test_zdnn_api_lstm_gru(
NNPA_LSTMACT,
default_input_shape, ZDNN_3DS, default_input_values,
default_uni_h0_shape, ZDNN_3DS, default_uni_h0_values,
default_uni_c0_shape, ZDNN_3DS, default_uni_c0_values,
default_uni_input_weights_shape, ZDNN_3DS,
default_uni_input_weights_f_values, default_uni_input_weights_i_values,
default_uni_input_weights_c_values, default_uni_input_weights_o_values,
default_uni_input_biases_shape, ZDNN_2DS,
default_uni_input_biases_f_values, default_uni_input_biases_i_values,
default_uni_input_biases_c_values, default_uni_input_biases_o_values,
default_uni_hidden_weights_shape, ZDNN_3DS,
default_uni_hidden_weights_f_values, default_uni_hidden_weights_i_values,
default_uni_hidden_weights_c_values, default_uni_hidden_weights_o_values,
default_uni_hidden_biases_shape, ZDNN_2DS,
default_uni_hidden_biases_f_values, default_uni_hidden_biases_i_values,
default_uni_hidden_biases_c_values, default_uni_hidden_biases_o_values,
default_bwd_hn_out_all_ts_shape, ZDNN_4DS,
default_bwd_exp_hn_out_all_ts_values,
default_bwd_cf_out_shape, ZDNN_4DS, default_bwd_exp_cf_out_values,
BWD, ZDNN_OK);
}
// Confirm that lstm returns OK and expected values when set to return only the
// final hn result
void lstm_basic_bwd_hn_final() {
test_zdnn_api_lstm_gru(
NNPA_LSTMACT,
default_input_shape, ZDNN_3DS, default_input_values,
default_uni_h0_shape, ZDNN_3DS, default_uni_h0_values,
default_uni_c0_shape, ZDNN_3DS, default_uni_c0_values,
default_uni_input_weights_shape, ZDNN_3DS,
default_uni_input_weights_f_values, default_uni_input_weights_i_values,
default_uni_input_weights_c_values, default_uni_input_weights_o_values,
default_uni_input_biases_shape, ZDNN_2DS,
default_uni_input_biases_f_values, default_uni_input_biases_i_values,
default_uni_input_biases_c_values, default_uni_input_biases_o_values,
default_uni_hidden_weights_shape, ZDNN_3DS,
default_uni_hidden_weights_f_values, default_uni_hidden_weights_i_values,
default_uni_hidden_weights_c_values, default_uni_hidden_weights_o_values,
default_uni_hidden_biases_shape, ZDNN_2DS,
default_uni_hidden_biases_f_values, default_uni_hidden_biases_i_values,
default_uni_hidden_biases_c_values, default_uni_hidden_biases_o_values,
default_bwd_hn_out_final_ts_shape, ZDNN_4DS,
default_bwd_exp_hn_out_final_ts_values,
default_bwd_cf_out_shape, ZDNN_4DS, default_bwd_exp_cf_out_values,
BWD, ZDNN_OK);
}
// Confirm that lstm returns OK and expected values when set to return hn
// results from all timesteps
void lstm_basic_bidir_hn_all() {
test_zdnn_api_lstm_gru(
NNPA_LSTMACT,
default_input_shape, ZDNN_3DS, default_input_values,
default_bidir_h0_shape, ZDNN_3DS, default_bidir_h0_values,
default_bidir_c0_shape, ZDNN_3DS, default_bidir_c0_values,
default_bidir_input_weights_shape, ZDNN_3DS,
default_bidir_input_weights_f_values,
default_bidir_input_weights_i_values,
default_bidir_input_weights_c_values,
default_bidir_input_weights_o_values,
default_bidir_input_biases_shape, ZDNN_2DS,
default_bidir_input_biases_f_values, default_bidir_input_biases_i_values,
default_bidir_input_biases_c_values, default_bidir_input_biases_o_values,
default_bidir_hidden_weights_shape, ZDNN_3DS,
default_bidir_hidden_weights_f_values,
default_bidir_hidden_weights_i_values,
default_bidir_hidden_weights_c_values,
default_bidir_hidden_weights_o_values,
default_bidir_hidden_biases_shape, ZDNN_2DS,
default_bidir_hidden_biases_f_values,
default_bidir_hidden_biases_i_values,
default_bidir_hidden_biases_c_values,
default_bidir_hidden_biases_o_values,
default_bidir_hn_out_all_ts_shape, ZDNN_4DS,
default_bidir_exp_hn_out_all_ts_values,
default_bidir_cf_out_shape, ZDNN_4DS, default_bidir_exp_cf_out_values,
BIDIR, ZDNN_OK);
}
// Confirm that lstm returns OK and expected values when set to return only the
// final hn result
void lstm_basic_bidir_hn_final() {
test_zdnn_api_lstm_gru(
NNPA_LSTMACT,
default_input_shape, ZDNN_3DS, default_input_values,
default_bidir_h0_shape, ZDNN_3DS, default_bidir_h0_values,
default_bidir_c0_shape, ZDNN_3DS, default_bidir_c0_values,
default_bidir_input_weights_shape, ZDNN_3DS,
default_bidir_input_weights_f_values,
default_bidir_input_weights_i_values,
default_bidir_input_weights_c_values,
default_bidir_input_weights_o_values,
default_bidir_input_biases_shape, ZDNN_2DS,
default_bidir_input_biases_f_values, default_bidir_input_biases_i_values,
default_bidir_input_biases_c_values, default_bidir_input_biases_o_values,
default_bidir_hidden_weights_shape, ZDNN_3DS,
default_bidir_hidden_weights_f_values,
default_bidir_hidden_weights_i_values,
default_bidir_hidden_weights_c_values,
default_bidir_hidden_weights_o_values,
default_bidir_hidden_biases_shape, ZDNN_2DS,
default_bidir_hidden_biases_f_values,
default_bidir_hidden_biases_i_values,
default_bidir_hidden_biases_c_values,
default_bidir_hidden_biases_o_values,
default_bidir_hn_out_final_ts_shape, ZDNN_4DS,
default_bidir_exp_hn_out_final_ts_values,
default_bidir_cf_out_shape, ZDNN_4DS, default_bidir_exp_cf_out_values,
BIDIR, ZDNN_OK);
}
int main() {
UNITY_BEGIN();
// FWD direction tests
RUN_TEST_ALL_DLFLOAT16_PRE_DATATYPES(lstm_basic_fwd_hn_all);
RUN_TEST_ALL_DLFLOAT16_PRE_DATATYPES(lstm_basic_fwd_hn_final);
// BWD direction tests
RUN_TEST_ALL_DLFLOAT16_PRE_DATATYPES(lstm_basic_bwd_hn_all);
RUN_TEST_ALL_DLFLOAT16_PRE_DATATYPES(lstm_basic_bwd_hn_final);
// BIDIR direction tests
RUN_TEST_ALL_DLFLOAT16_PRE_DATATYPES(lstm_basic_bidir_hn_all);
RUN_TEST_ALL_DLFLOAT16_PRE_DATATYPES(lstm_basic_bidir_hn_final);
return UNITY_END();
}
zDNN-1.1.2/tests/testDriver_zdnn_matmul_bcast_op.c 0000664 0000000 0000000 00000047243 15000221702 0022246 0 ustar 00root root 0000000 0000000 // SPDX-License-Identifier: Apache-2.0
/*
* Copyright IBM Corp. 2023, 2024
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#include "testsupport.h"
#include
#include
#include
void setUp(void) {
tol_bfloat.ulps = 64;
tol_bfloat.epsilon_mult = (0.1 / EPSILON_BFLOAT) + 1;
tol_fp16.ulps = 64;
tol_fp16.epsilon_mult = (0.1 / EPSILON_FP16) + 1;
tol_fp32.ulps = 64 * 16384;
tol_fp32.epsilon_mult = (0.1 / EPSILON_FLOAT) + 1;
VERIFY_HW_ENV;
VERIFY_PARMBLKFORMAT_1;
}
void tearDown(void){}
/**
* Helper macro that given the indices and sizes of a multidimensional array
* returns equivalent index to a flat representation of the same array. The
* result is cast to uint64_t as that's the largest number of total elements a
* ztensor supports as opposed to the single dimension maximum of unint32_t
*
* Note: Default usage is for 3D arrays. For 2D arrays, use 0 for the
* undefined dimension's index and 1 its size.
*/
#define GET_FLAT_IDX(stack, row, col, row_size, col_size) \
(uint64_t)(stack) * (row_size) * (col_size) + (row) * (col_size) + (col)
/**
* Helper function to print matmul arrays. 3D arrays are printed as separate
* stacks of 2D arrays.
*/
void print_matmul_array(uint32_t s, uint32_t r, uint32_t c, char *name,
float *arr) {
printf("Printing \"%s\" as %u stack(s) of array[%u][%u]\n", name, s, r, c);
for (uint32_t i = 0; i < s; i++) {
printf("\"%s\" stack %u\n", name, i);
for (uint32_t j = 0; j < r; j++) {
for (uint32_t k = 0; k < c; k++) {
printf("%f ", arr[GET_FLAT_IDX(i, j, k, r, c)]);
}
printf("\n");
}
}
printf("end \"%s\"\n\n", name);
}
/**
* Helper function to compute expected output tensor from randomly generated
* test input arrays.
*
* | first | second | bias | result |
* | (s, m, n) | (s, n, p) | (s, p) | (s, m, p) |
*
*/
void gen_test_expected_fp32_array(uint32_t s, uint32_t m, uint32_t n,
uint32_t p, zdnn_data_types type,
float *first, float *second, float *bias,
float *result) {
for (uint32_t i = 0; i < s; i++) { // MATRIX from stack
for (uint32_t j = 0; j < m; j++) { // ROW of Mat 1
for (uint32_t k = 0; k < p; k++) { // COL of Mat 2
uint64_t result_idx = GET_FLAT_IDX(i, j, k, m, p);
uint64_t bias_idx = GET_FLAT_IDX(i, 0, k, 1, p);
float cleansed_bias = 0;
switch (type) {
case (BFLOAT):
cleansed_bias = CLEANSE_BFLOAT(bias[bias_idx]);
break;
case (FP16):
cleansed_bias = CLEANSE_FP16(bias[bias_idx]);
break;
case (FP32):
cleansed_bias = CLEANSE_FP32(bias[bias_idx]);
break;
default:
break;
}
result[result_idx] = cleansed_bias; // bias add
BEGIN_BLOCK_IF_LOGLEVEL_DEBUG {
printf("result[%u][%u][%u] = ", i, j, k);
}
for (uint32_t l = 0; l < n; l++) { // COL of Mat 1
uint64_t first_idx = GET_FLAT_IDX(i, j, l, m, n);
uint64_t second_idx = GET_FLAT_IDX(i, l, k, n, p);
float cleansed_first = 0;
float cleansed_second = 0;
switch (type) {
case (BFLOAT):
cleansed_first = CLEANSE_BFLOAT(first[first_idx]);
cleansed_second = CLEANSE_BFLOAT(second[second_idx]);
break;
case (FP16):
cleansed_first = CLEANSE_FP16(first[first_idx]);
cleansed_second = CLEANSE_FP16(second[second_idx]);
break;
case (FP32):
cleansed_first = CLEANSE_FP32(first[first_idx]);
cleansed_second = CLEANSE_FP32(second[second_idx]);
break;
default:
break;
}
result[result_idx] += cnvt_1_dlf16_to_fp32(cnvt_1_fp32_to_dlf16(
cleansed_first * cleansed_second)); // dot product
// Prints the math that generates each cell of the output.
BEGIN_BLOCK_IF_LOGLEVEL_DEBUG {
printf("(%f * %f) + ", cleansed_first, cleansed_second);
}
}
BEGIN_BLOCK_IF_LOGLEVEL_DEBUG {
printf("%f = %f\n", cleansed_bias, result[result_idx]);
}
}
}
}
}
/**
* zdnn_matmul_bcast23_op_test
*
* Handles all the logic to run custom tests.
*
* shapes are in interpreted as:
* - input_a = m x n ZDNN_3DS
* - input_b = s x n x p ZDNN_1D
* - bias = s x p ZDNN_1D
* - output = s x m x p ZDNN_3DS
*
*/
void zdnn_matmul_bcast23_op_test(
uint32_t *input_a_shape, uint32_t *input_b_shape,
uint32_t *input_bias_shape, uint32_t *output_shape, float *input_a,
float *input_b, float *bias, zdnn_matmul_bcast_ops op_type,
zdnn_status expected_status, float *expected_values) {
/*
* Input A Tensor
*/
zdnn_ztensor *input_a_ztensor = alloc_ztensor_with_values(
input_a_shape, ZDNN_3DS, test_datatype, NO_CONCAT, false, input_a);
/*
* Input B Tensor
*/
zdnn_ztensor *input_b_ztensor = alloc_ztensor_with_values(
input_b_shape, ZDNN_2D, test_datatype, NO_CONCAT, false, input_b);
/*
* Bias Tensor
*/
zdnn_ztensor *input_bias_ztensor = alloc_ztensor_with_values(
input_bias_shape, ZDNN_1D, test_datatype, NO_CONCAT, false, bias);
/*
* Output Tensor
*/
zdnn_ztensor *output_ztensor = alloc_ztensor_with_values(
output_shape, ZDNN_3DS, test_datatype, NO_CONCAT, true, ZERO_ARRAY);
/*
* Get back zDNN test status
*/
zdnn_status test_status = GENERAL_TESTCASE_FAILURE;
test_status =
zdnn_matmul_bcast_op(input_a_ztensor, input_b_ztensor, input_bias_ztensor,
op_type, output_ztensor);
TEST_ASSERT_MESSAGE_FORMATTED(
expected_status == test_status,
"Expected status %08x from zdnn_matmul_bcast_op() with %d Op but %08x "
"was returned.",
expected_status, op_type, test_status);
BEGIN_BLOCK_IF_LOGLEVEL_DEBUG {
int s = input_b_ztensor->transformed_desc->dim4;
int m = input_a_ztensor->transformed_desc->dim2;
int n = input_a_ztensor->transformed_desc->dim1;
int p = input_b_ztensor->transformed_desc->dim1;
print_matmul_array(1, m, n, "input_a", input_a);
print_matmul_array(s, n, p, "input_b", input_b);
print_matmul_array(s, 1, p, "bias", bias);
print_matmul_array(s, m, p, "expected_values", expected_values);
}
fp_tolerance *tol = NULL;
switch (output_ztensor->pre_transformed_desc->type) {
case BFLOAT:
tol = &tol_bfloat;
break;
case FP16:
tol = &tol_fp16;
break;
case FP32:
tol = &tol_fp32;
break;
default:
break;
// should never get here
}
// Only check expected values if we expected the NNPA call to be successful
if (expected_status == ZDNN_OK) {
assert_ztensor_values_adv(output_ztensor, false, expected_values, *tol);
}
// All done--clean up the tensor buffers
free_ztensor_buffers(4, input_a_ztensor, input_b_ztensor, input_bias_ztensor,
output_ztensor);
}
/**
* - MatMul Broadcast 23 Compare
*
* - Matrix input_a = 3x4x3 -- Manually Coded Input
* - Matrix input_b = 3x2 -- Manually Coded Input
* - Matrix bias = 2 -- Manually Coded Input
* - Matrix output = 3x4x2
*/
void test_compare_3x4x3_by_3x2(zdnn_matmul_bcast_ops op, float *exp_vals) {
// Setup Input A
uint32_t input_a_shape[] = {3, 4, 3};
float input_a_values[] = {1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12,
1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12,
1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12};
// manually "broadcast" those 3*2 entries 3 times across input_a_values[]
// because gen_test_expected_fp32_array() doesn't handle broadcast natively
uint32_t input_b_shape[] = {3, 2};
float input_b_values[] = {1, 2, 3, 4, 5, 6, 1, 2, 3,
4, 5, 6, 1, 2, 3, 4, 5, 6};
// manually "broadcast" those 2 entries 3 times across input_a_values[]
// because gen_test_expected_fp32_array() doesn't handle broadcast natively
uint32_t input_c_shape[] = {2};
float input_c_values[] = {50, 100, 50, 100, 50, 100};
// Output tensor and expected values
uint32_t output_shape[] = {3, 4, 2};
zdnn_matmul_bcast23_op_test(input_a_shape, input_b_shape, input_c_shape,
output_shape, input_a_values, input_b_values,
input_c_values, op, ZDNN_OK, exp_vals);
}
void zdnn_matmul_bcast_compare_3x4x3_by_3x2_greater() {
float is_greater_exp_vals[] = {0, 0, 0, 0, 1, 0, 1, 1, 0, 0, 0, 0,
1, 0, 1, 1, 0, 0, 0, 0, 1, 0, 1, 1};
test_compare_3x4x3_by_3x2(MATMUL_BCAST_OP_GREATER, is_greater_exp_vals);
}
void zdnn_matmul_bcast_compare_3x4x3_by_3x2_greater_equal() {
float is_greater_equal_exp_vals[] = {0, 0, 0, 0, 1, 1, 1, 1, 0, 0, 0, 0,
1, 1, 1, 1, 0, 0, 0, 0, 1, 1, 1, 1};
test_compare_3x4x3_by_3x2(MATMUL_BCAST_OP_GREATER_EQUAL,
is_greater_equal_exp_vals);
}
void zdnn_matmul_bcast_compare_3x4x3_by_3x2_equal() {
float is_equal_exp_vals[] = {0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0,
0, 1, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0};
test_compare_3x4x3_by_3x2(MATMUL_BCAST_OP_EQUAL, is_equal_exp_vals);
}
void zdnn_matmul_bcast_compare_3x4x3_by_3x2_not_equal() {
float is_not_equal_exp_vals[] = {1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1,
1, 0, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1};
test_compare_3x4x3_by_3x2(MATMUL_BCAST_OP_NOT_EQUAL, is_not_equal_exp_vals);
}
void zdnn_matmul_bcast_compare_3x4x3_by_3x2_lesser_equal() {
float is_lesser_equal_exp_vals[] = {1, 1, 1, 1, 0, 1, 0, 0, 1, 1, 1, 1,
0, 1, 0, 0, 1, 1, 1, 1, 0, 1, 0, 0};
test_compare_3x4x3_by_3x2(MATMUL_BCAST_OP_LESSER_EQUAL,
is_lesser_equal_exp_vals);
}
void zdnn_matmul_bcast_compare_3x4x3_by_3x2_lesser() {
float is_lesser_exp_vals[] = {1, 1, 1, 1, 0, 0, 0, 0, 1, 1, 1, 1,
0, 0, 0, 0, 1, 1, 1, 1, 0, 0, 0, 0};
test_compare_3x4x3_by_3x2(MATMUL_BCAST_OP_LESSER, is_lesser_exp_vals);
}
/**
* zdnn_matmul_bcast1_op_test
*
* Handles all the logic to run custom tests.
*
* shapes are in interpreted as:
* - input_a = m x n ZDNN_2D
* - input_b = s x n x p ZDNN_3DS
* - bias = s x p ZDNN_2DS
* - output = s x m x p ZDNN_3DS
*
*/
void zdnn_matmul_bcast1_op_test(
uint32_t *input_a_shape, uint32_t *input_b_shape,
uint32_t *input_bias_shape, uint32_t *output_shape, float *input_a,
float *input_b, float *bias, zdnn_matmul_bcast_ops op_type,
zdnn_status expected_status, float *expected_values) {
/*
* Input A Tensor
*/
zdnn_ztensor *input_a_ztensor = alloc_ztensor_with_values(
input_a_shape, ZDNN_2D, test_datatype, NO_CONCAT, false, input_a);
/*
* Input B Tensor
*/
zdnn_ztensor *input_b_ztensor = alloc_ztensor_with_values(
input_b_shape, ZDNN_3DS, test_datatype, NO_CONCAT, false, input_b);
/*
* Bias Tensor
*/
zdnn_ztensor *input_bias_ztensor = alloc_ztensor_with_values(
input_bias_shape, ZDNN_2DS, test_datatype, NO_CONCAT, false, bias);
/*
* Output Tensor
*/
zdnn_ztensor *output_ztensor = alloc_ztensor_with_values(
output_shape, ZDNN_3DS, test_datatype, NO_CONCAT, true, ZERO_ARRAY);
/*
* Get back zDNN test status
*/
zdnn_status test_status = GENERAL_TESTCASE_FAILURE;
test_status =
zdnn_matmul_bcast_op(input_a_ztensor, input_b_ztensor, input_bias_ztensor,
op_type, output_ztensor);
TEST_ASSERT_MESSAGE_FORMATTED(
expected_status == test_status,
"Expected status %08x from zdnn_matmul_bcast_op() with %d Op but %08x "
"was returned.",
expected_status, op_type, test_status);
BEGIN_BLOCK_IF_LOGLEVEL_DEBUG {
int s = input_b_ztensor->transformed_desc->dim4;
int m = input_a_ztensor->transformed_desc->dim2;
int n = input_a_ztensor->transformed_desc->dim1;
int p = input_b_ztensor->transformed_desc->dim1;
print_matmul_array(1, m, n, "input_a", input_a);
print_matmul_array(s, n, p, "input_b", input_b);
print_matmul_array(s, 1, p, "bias", bias);
print_matmul_array(s, m, p, "expected_values", expected_values);
}
fp_tolerance *tol = NULL;
switch (output_ztensor->pre_transformed_desc->type) {
case BFLOAT:
tol = &tol_bfloat;
break;
case FP16:
tol = &tol_fp16;
break;
case FP32:
tol = &tol_fp32;
break;
default:
break;
// should never get here
}
// Only check expected values if we expected the NNPA call to be successful
if (expected_status == ZDNN_OK) {
assert_ztensor_values_adv(output_ztensor, false, expected_values, *tol);
}
// All done--clean up the tensor buffers
free_ztensor_buffers(4, input_a_ztensor, input_b_ztensor, input_bias_ztensor,
output_ztensor);
}
/**
* - MatMul Broadcast 1 BiasAdd
*
* - Matrix input_a = 1 x m x n --Randomly Generated Positive/Negative Array
* - Matrix input_b = s x n x p --Randomly Generated Positive/Negative Array
* - Matrix bias = s x p --Randomly Generated Positive Array
* - Matrix output = s x m x p
*/
void zdnn_matmul_bcast_op_mn_by_snp(uint64_t s, uint64_t m, uint64_t n,
uint64_t p) {
uint64_t num_values = 0;
// Setup Input A using random values
uint32_t input_a_shape[] = {m, n};
num_values = m * n;
float input_a_values[s * num_values];
gen_random_float_array_pos_neg(num_values, input_a_values);
// manually "broadcast" those m*n entries s times across input_a_values[]
// because gen_test_expected_fp32_array() doesn't handle broadcast natively
uint64_t size = m * n * sizeof(float);
uint8_t *tmp_ptr = (uint8_t *)((uintptr_t)input_a_values + size);
for (uint64_t i = 1; i < s; i++) {
memcpy((void *)tmp_ptr, (void *)input_a_values, size);
tmp_ptr += size;
}
// Setup Input B using random values
uint32_t input_b_shape[] = {s, n, p};
num_values = s * n * p;
float input_b_values[num_values];
gen_random_float_array_pos_neg(num_values, input_b_values);
// Setup Input bias using random values
uint32_t input_bias_shape[] = {s, p};
num_values = s * p;
float input_bias_values[num_values];
gen_random_float_array(num_values, input_bias_values);
// Setup Output and expected values
uint32_t output_shape[] = {s, m, p};
num_values = s * m * p;
float expected_values[num_values];
gen_test_expected_fp32_array(s, m, n, p, test_datatype, input_a_values,
input_b_values, input_bias_values,
expected_values);
zdnn_matmul_bcast1_op_test(input_a_shape, input_b_shape, input_bias_shape,
output_shape, input_a_values, input_b_values,
input_bias_values, MATMUL_BCAST_OP_ADDITION,
ZDNN_OK, expected_values);
}
void zdnn_matmul_bcast_bias_add_10x11_by_3x11x2() {
zdnn_matmul_bcast_op_mn_by_snp(3, 10, 11, 2);
}
/**
* - MatMul Broadcast 1 Compare
*
* - Matrix input_a = 4x3 -- Manually Coded Input
* - Matrix input_b = 3x3x2 -- Manually Coded Input
* - Matrix bias = 3x2 -- Manually Coded Input
* - Matrix output = 3x4x2
*/
void test_compare_4x3_by_3x3x2(zdnn_matmul_bcast_ops op, float *exp_vals) {
// Setup Input A
uint32_t input_a_shape[] = {4, 3};
// manually "broadcast" those 4*3 entries 3 times across input_a_values[]
// because gen_test_expected_fp32_array() doesn't handle broadcast natively
float input_a_values[] = {1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12,
1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12,
1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12};
// Setup Input B
uint32_t input_b_shape[] = {3, 3, 2};
float input_b_values[] = {1, 2, 3, 4, 5, 6, 1, 2, 3,
4, 5, 6, 1, 2, 3, 4, 5, 6};
// Setup Input bias
uint32_t input_c_shape[] = {3, 2};
float input_c_values[] = {50, 100, 50, 100, 50, 100};
// Output tensor and expected values
uint32_t output_shape[] = {3, 4, 2};
zdnn_matmul_bcast1_op_test(input_a_shape, input_b_shape, input_c_shape,
output_shape, input_a_values, input_b_values,
input_c_values, op, ZDNN_OK, exp_vals);
}
void zdnn_matmul_bcast_compare_4x3_by_3x3x2_greater() {
float is_greater_exp_vals[] = {0, 0, 0, 0, 1, 0, 1, 1, 0, 0, 0, 0,
1, 0, 1, 1, 0, 0, 0, 0, 1, 0, 1, 1};
test_compare_4x3_by_3x3x2(MATMUL_BCAST_OP_GREATER, is_greater_exp_vals);
}
void zdnn_matmul_bcast_compare_4x3_by_3x3x2_greater_equal() {
float is_greater_equal_exp_vals[] = {0, 0, 0, 0, 1, 1, 1, 1, 0, 0, 0, 0,
1, 1, 1, 1, 0, 0, 0, 0, 1, 1, 1, 1};
test_compare_4x3_by_3x3x2(MATMUL_BCAST_OP_GREATER_EQUAL,
is_greater_equal_exp_vals);
}
void zdnn_matmul_bcast_compare_4x3_by_3x3x2_equal() {
float is_equal_exp_vals[] = {0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0,
0, 1, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0};
test_compare_4x3_by_3x3x2(MATMUL_BCAST_OP_EQUAL, is_equal_exp_vals);
}
void zdnn_matmul_bcast_compare_4x3_by_3x3x2_not_equal() {
float is_not_equal_exp_vals[] = {1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1,
1, 0, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1};
test_compare_4x3_by_3x3x2(MATMUL_BCAST_OP_NOT_EQUAL, is_not_equal_exp_vals);
}
void zdnn_matmul_bcast_compare_4x3_by_3x3x2_lesser_equal() {
float is_lesser_equal_exp_vals[] = {1, 1, 1, 1, 0, 1, 0, 0, 1, 1, 1, 1,
0, 1, 0, 0, 1, 1, 1, 1, 0, 1, 0, 0};
test_compare_4x3_by_3x3x2(MATMUL_BCAST_OP_LESSER_EQUAL,
is_lesser_equal_exp_vals);
}
void zdnn_matmul_bcast_compare_4x3_by_3x3x2_lesser() {
float is_lesser_exp_vals[] = {1, 1, 1, 1, 0, 0, 0, 0, 1, 1, 1, 1,
0, 0, 0, 0, 1, 1, 1, 1, 0, 0, 0, 0};
test_compare_4x3_by_3x3x2(MATMUL_BCAST_OP_LESSER, is_lesser_exp_vals);
}
int main() {
UNITY_BEGIN();
RUN_TEST_ALL_DLFLOAT16_PRE_DATATYPES(
zdnn_matmul_bcast_compare_3x4x3_by_3x2_greater);
RUN_TEST_ALL_DLFLOAT16_PRE_DATATYPES(
zdnn_matmul_bcast_compare_3x4x3_by_3x2_greater_equal);
RUN_TEST_ALL_DLFLOAT16_PRE_DATATYPES(
zdnn_matmul_bcast_compare_3x4x3_by_3x2_equal);
RUN_TEST_ALL_DLFLOAT16_PRE_DATATYPES(
zdnn_matmul_bcast_compare_3x4x3_by_3x2_not_equal);
RUN_TEST_ALL_DLFLOAT16_PRE_DATATYPES(
zdnn_matmul_bcast_compare_3x4x3_by_3x2_lesser_equal);
RUN_TEST_ALL_DLFLOAT16_PRE_DATATYPES(
zdnn_matmul_bcast_compare_3x4x3_by_3x2_lesser);
RUN_TEST_ALL_DLFLOAT16_PRE_DATATYPES(
zdnn_matmul_bcast_bias_add_10x11_by_3x11x2);
RUN_TEST_ALL_DLFLOAT16_PRE_DATATYPES(
zdnn_matmul_bcast_compare_4x3_by_3x3x2_greater);
RUN_TEST_ALL_DLFLOAT16_PRE_DATATYPES(
zdnn_matmul_bcast_compare_4x3_by_3x3x2_greater_equal);
RUN_TEST_ALL_DLFLOAT16_PRE_DATATYPES(
zdnn_matmul_bcast_compare_4x3_by_3x3x2_equal);
RUN_TEST_ALL_DLFLOAT16_PRE_DATATYPES(
zdnn_matmul_bcast_compare_4x3_by_3x3x2_not_equal);
RUN_TEST_ALL_DLFLOAT16_PRE_DATATYPES(
zdnn_matmul_bcast_compare_4x3_by_3x3x2_lesser_equal);
RUN_TEST_ALL_DLFLOAT16_PRE_DATATYPES(
zdnn_matmul_bcast_compare_4x3_by_3x3x2_lesser);
return UNITY_END();
}
zDNN-1.1.2/tests/testDriver_zdnn_matmul_op.c 0000664 0000000 0000000 00000062500 15000221702 0021063 0 ustar 00root root 0000000 0000000 // SPDX-License-Identifier: Apache-2.0
/*
* Copyright IBM Corp. 2021, 2024
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#include "testsupport.h"
#include
#include
#include
void setUp(void) { /* This is run before EACH TEST */
tol_bfloat.ulps = 64;
tol_bfloat.epsilon_mult = (0.1 / EPSILON_BFLOAT) + 1;
tol_fp16.ulps = 64;
tol_fp16.epsilon_mult = (0.1 / EPSILON_FP16) + 1;
tol_fp32.ulps = 64 * 16384;
tol_fp32.epsilon_mult = (0.1 / EPSILON_FLOAT) + 1;
VERIFY_HW_ENV;
}
void tearDown(void){}
/**
* Helper macro that given the indices and sizes of a multidimensional array
* returns equivalent index to a flat representation of the same array. The
* result is cast to uint64_t as that's the largest number of total elements a
* ztensor supports as opposed to the single dimension maximum of unint32_t
*
* Note: Default usage is for 3D arrays. For 2D arrays, use 0 for the
* undefined dimension's index and 1 its size.
*/
#define GET_FLAT_IDX(stack, row, col, row_size, col_size) \
(uint64_t)(stack) * (row_size) * (col_size) + (row) * (col_size) + (col)
/**
* Helper function to print matmul arrays. 3D arrays are printed as separate
* stacks of 2D arrays.
*/
void print_matmul_array(uint32_t s, uint32_t r, uint32_t c, char *name,
float *arr) {
printf("Printing \"%s\" as %u stack(s) of array[%u][%u]\n", name, s, r, c);
for (uint32_t i = 0; i < s; i++) {
printf("\"%s\" stack %u\n", name, i);
for (uint32_t j = 0; j < r; j++) {
for (uint32_t k = 0; k < c; k++) {
printf("%f ", arr[GET_FLAT_IDX(i, j, k, r, c)]);
}
printf("\n");
}
}
printf("end \"%s\"\n\n", name);
}
/**
* Helper function to compute expected output tensor from randomly generated
* test input arrays.
*
* | first | second | bias | result |
* | (s, m, n) | (s, n, p) | (s, p) | (s, m, p) |
*
*/
void gen_test_expected_fp32_array(uint32_t s, uint32_t m, uint32_t n,
uint32_t p, zdnn_data_types type,
float *first, float *second, float *bias,
float *result) {
for (uint32_t i = 0; i < s; i++) { // MATRIX from stack
for (uint32_t j = 0; j < m; j++) { // ROW of Mat 1
for (uint32_t k = 0; k < p; k++) { // COL of Mat 2
uint64_t result_idx = GET_FLAT_IDX(i, j, k, m, p);
uint64_t bias_idx = GET_FLAT_IDX(i, 0, k, 1, p);
float cleansed_bias = 0;
switch (type) {
case (BFLOAT):
cleansed_bias = CLEANSE_BFLOAT(bias[bias_idx]);
break;
case (FP16):
cleansed_bias = CLEANSE_FP16(bias[bias_idx]);
break;
case (FP32):
cleansed_bias = CLEANSE_FP32(bias[bias_idx]);
break;
default:
break;
}
result[result_idx] = cleansed_bias; // bias add
BEGIN_BLOCK_IF_LOGLEVEL_DEBUG {
printf("result[%u][%u][%u] = ", i, j, k);
}
for (uint32_t l = 0; l < n; l++) { // COL of Mat 1
uint64_t first_idx = GET_FLAT_IDX(i, j, l, m, n);
uint64_t second_idx = GET_FLAT_IDX(i, l, k, n, p);
float cleansed_first = 0;
float cleansed_second = 0;
switch (type) {
case (BFLOAT):
cleansed_first = CLEANSE_BFLOAT(first[first_idx]);
cleansed_second = CLEANSE_BFLOAT(second[second_idx]);
break;
case (FP16):
cleansed_first = CLEANSE_FP16(first[first_idx]);
cleansed_second = CLEANSE_FP16(second[second_idx]);
break;
case (FP32):
cleansed_first = CLEANSE_FP32(first[first_idx]);
cleansed_second = CLEANSE_FP32(second[second_idx]);
break;
default:
break;
}
result[result_idx] += cnvt_1_dlf16_to_fp32(cnvt_1_fp32_to_dlf16(
cleansed_first * cleansed_second)); // dot product
// Prints the math that generates each cell of the output.
BEGIN_BLOCK_IF_LOGLEVEL_DEBUG {
printf("(%f * %f) + ", cleansed_first, cleansed_second);
}
}
BEGIN_BLOCK_IF_LOGLEVEL_DEBUG {
printf("%f = %f\n", cleansed_bias, result[result_idx]);
}
}
}
}
}
/**
* do_test
*
* Handles all the logic to run custom tests.
*
* when is_stacked is true, shapes are interpreted as:
* - input_a = s x m x n ZDNN_3DS
* - input_b = s x n x p ZDNN_3DS
* - bias = s x p ZDNN_2DS
* - output = s x m x p ZDNN_3DS
*
* when is_stacked is not true, shapes are interpreted as:
* - input_a = m x n ZDNN_2D
* - input_b = n x p ZDNN_2D
* - bias = p ZDNN_1D
* - output = m x p ZDNN_2D
*
* when is_bcast is true (regardless of is_stacked), shapes are in interpreted
* as:
* - input_a = s x m x n ZDNN_3DS
* - input_b = n x p ZDNN_2D
* - bias = p ZDNN_1D
* - output = s x m x p ZDNN_3DS
*
*/
void do_test(uint32_t *input_a_shape, uint32_t *input_b_shape,
uint32_t *input_bias_shape, uint32_t *output_shape,
bool is_stacked, bool is_bcast, float *input_a, float *input_b,
float *bias, zdnn_matmul_ops op_type, zdnn_status expected_status,
float *expected_values) {
/*
* Input A Tensor
*/
zdnn_ztensor *input_a_ztensor = alloc_ztensor_with_values(
input_a_shape, (!is_stacked && !is_bcast) ? ZDNN_2D : ZDNN_3DS,
test_datatype, NO_CONCAT, false, input_a);
/*
* Input B Tensor
*/
zdnn_ztensor *input_b_ztensor = alloc_ztensor_with_values(
input_b_shape, (is_stacked && !is_bcast) ? ZDNN_3DS : ZDNN_2D,
test_datatype, NO_CONCAT, false, input_b);
/*
* Bias Tensor
*/
zdnn_ztensor *input_bias_ztensor = alloc_ztensor_with_values(
input_bias_shape, (is_stacked && !is_bcast) ? ZDNN_2DS : ZDNN_1D,
test_datatype, NO_CONCAT, false, bias);
/*
* Output Tensor
*/
zdnn_ztensor *output_ztensor = alloc_ztensor_with_values(
output_shape, (!is_stacked && !is_bcast) ? ZDNN_2D : ZDNN_3DS,
test_datatype, NO_CONCAT, true, ZERO_ARRAY);
/*
* Get back zDNN test status
*/
zdnn_status test_status = GENERAL_TESTCASE_FAILURE;
if (!is_bcast) {
test_status = zdnn_matmul_op(input_a_ztensor, input_b_ztensor,
input_bias_ztensor, op_type, output_ztensor);
TEST_ASSERT_MESSAGE_FORMATTED(
expected_status == test_status,
"Expected status %08x from zdnn_matmul_op() with %d Op but %08x was "
"returned.",
expected_status, op_type, test_status);
} else {
test_status =
zdnn_matmul_bcast_op(input_a_ztensor, input_b_ztensor,
input_bias_ztensor, op_type, output_ztensor);
TEST_ASSERT_MESSAGE_FORMATTED(
expected_status == test_status,
"Expected status %08x from zdnn_matmul_bcast_op() with %d Op but %08x "
"was returned.",
expected_status, op_type, test_status);
}
BEGIN_BLOCK_IF_LOGLEVEL_DEBUG {
int s = input_a_ztensor->transformed_desc->dim4;
int m = input_a_ztensor->transformed_desc->dim2;
int n = input_a_ztensor->transformed_desc->dim1;
int p = input_b_ztensor->transformed_desc->dim1;
print_matmul_array(s, m, n, "input_a", input_a);
print_matmul_array(s, n, p, "input_b", input_b);
print_matmul_array(s, 1, p, "bias", bias);
print_matmul_array(s, m, p, "expected_values", expected_values);
}
fp_tolerance *tol = NULL;
switch (output_ztensor->pre_transformed_desc->type) {
case BFLOAT:
tol = &tol_bfloat;
break;
case FP16:
tol = &tol_fp16;
break;
case FP32:
tol = &tol_fp32;
break;
default:
break;
// should never get here
}
// Only check expected values if we expected the NNPA call to be successful
if (expected_status == ZDNN_OK) {
assert_ztensor_values_adv(output_ztensor, false, expected_values, *tol);
}
// All done--clean up the tensor buffers
free_ztensor_buffers(4, input_a_ztensor, input_b_ztensor, input_bias_ztensor,
output_ztensor);
}
void zdnn_matmul_op_test(uint32_t *input_a_shape, uint32_t *input_b_shape,
uint32_t *input_bias_shape, uint32_t *output_shape,
bool is_stacked, float *input_a, float *input_b,
float *bias, zdnn_matmul_ops op_type,
zdnn_status expected_status, float *expected_values) {
do_test(input_a_shape, input_b_shape, input_bias_shape, output_shape,
is_stacked, false, input_a, input_b, bias, op_type, expected_status,
expected_values);
}
void zdnn_matmul_bcast_op_test(uint32_t *input_a_shape, uint32_t *input_b_shape,
uint32_t *input_bias_shape,
uint32_t *output_shape, float *input_a,
float *input_b, float *bias,
zdnn_matmul_bcast_ops op_type,
zdnn_status expected_status,
float *expected_values) {
do_test(input_a_shape, input_b_shape, input_bias_shape, output_shape, false,
true, input_a, input_b, bias, op_type, expected_status,
expected_values);
}
/**
* - MatMulBiasAdd (non-stacked)
*
* - Matrix input_a = 3x3 -- Manually Coded Input
* - Matrix input_b = 3x3 -- Manually Coded Input
* - Matrix bias = 3 -- Manually Coded Input
* - Matrix output = 3x3
*/
void zdnn_matmul_biasadd_3x3_by_3x3() {
// Setup Input A
uint32_t input_a_shape[] = {3, 3};
float input_a_values[] = {0.10, 0.20, 0.30, 0.40, 0.50,
0.60, 0.70, 0.80, 0.90};
// Setup Input B
uint32_t input_b_shape[] = {3, 3};
float input_b_values[] = {10, 20, 30, 40, 50, 60, 70, 80, 90};
// Setup Input bias
uint32_t input_bias_shape[] = {3};
float input_bias_values[] = {10, 10, 10};
// Output tensor and expected values
uint32_t output_shape[] = {input_a_shape[0], input_b_shape[1]};
float expected_values[] = {40, 46, 52, 76, 91, 106, 112, 136, 160};
zdnn_matmul_op_test(input_a_shape, input_b_shape, input_bias_shape,
output_shape, false, input_a_values, input_b_values,
input_bias_values, MATMUL_OP_ADDITION, ZDNN_OK,
expected_values);
}
/**
* - MatMulBiasAdd (non-stacked, bigger values)
*
* - Matrix input_a = 3x3 -- Manually Coded Input
* - Matrix input_b = 3x3 -- Manually Coded Input
* - Matrix bias = 3 -- Manually Coded Input
* - Matrix output = 3x3
*/
void zdnn_matmul_biasadd_3x3_by_3x3_bigger_vals() {
// Setup Input A
uint32_t input_a_shape[] = {3, 3};
float input_a_values[] = {1, 2, 3, 4, 5, 6, 7, 8, 9};
// Setup Input B
uint32_t input_b_shape[] = {3, 3};
float input_b_values[] = {10, 20, 30, 40, 50, 60, 70, 80, 90};
// Setup Input bias
uint32_t input_bias_shape[] = {3};
float input_bias_values[] = {10, 10, 10};
// Output tensor and expected values
uint32_t output_shape[] = {input_a_shape[0], input_b_shape[1]};
float expected_values[] = {310, 370, 430, 670, 820, 970, 1030, 1270, 1510};
zdnn_matmul_op_test(input_a_shape, input_b_shape, input_bias_shape,
output_shape, false, input_a_values, input_b_values,
input_bias_values, MATMUL_OP_ADDITION, ZDNN_OK,
expected_values);
}
/**
* - MatMulBiasAdd (non-stacked)
*
* - Matrix input_a = 4x3 -- Manually Coded Input
* - Matrix input_b = 3x2 -- Manually Coded Input
* - Matrix bias = 2 -- Manually Coded Input
* - Matrix output = 4x2
*/
void zdnn_matmul_biasadd_4x3_by_3x2() {
// Setup Input A
uint32_t input_a_shape[] = {4, 3};
float input_a_values[] = {1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12};
// Setup Input B
uint32_t input_b_shape[] = {3, 2};
float input_b_values[] = {1, 2, 3, 4, 5, 6};
// Setup Input bias
uint32_t input_bias_shape[] = {2};
float input_bias_values[] = {3, 3};
// Output tensor and expected values
uint32_t output_shape[] = {input_a_shape[0], input_b_shape[1]};
float expected_values[] = {25, 31, 52, 67, 79, 103, 106, 139};
zdnn_matmul_op_test(input_a_shape, input_b_shape, input_bias_shape,
output_shape, false, input_a_values, input_b_values,
input_bias_values, MATMUL_OP_ADDITION, ZDNN_OK,
expected_values);
}
/**
* - MatMulBiasAdd (stacked)
*
* - Matrix input_a = s x m x n --Randomly Generated Positive/Negative Array
* - Matrix input_b = s x n x p --Randomly Generated Positive/Negative Array
* - Matrix bias = s x p --Randomly Generated Positive Array
* - Matrix output = s x m x p
*/
void zdnn_matmul_biasadd_smn_by_snp(uint64_t s, uint64_t m, uint64_t n,
uint64_t p) {
uint64_t num_values = 0;
// Setup Input A using random values
uint32_t input_a_shape[] = {s, m, n};
num_values = s * m * n;
float input_a_values[num_values];
gen_random_float_array_pos_neg(num_values, input_a_values);
// Setup Input B using random values
uint32_t input_b_shape[] = {s, n, p};
num_values = s * n * p;
float input_b_values[num_values];
gen_random_float_array_pos_neg(num_values, input_b_values);
// Setup Input bias using random values
uint32_t input_bias_shape[] = {s, p};
num_values = s * p;
float input_bias_values[num_values];
gen_random_float_array(num_values, input_bias_values);
// Setup Output and expected values
uint32_t output_shape[] = {s, m, p};
num_values = s * m * p;
float expected_values[num_values];
gen_test_expected_fp32_array(s, m, n, p, test_datatype, input_a_values,
input_b_values, input_bias_values,
expected_values);
zdnn_matmul_op_test(input_a_shape, input_b_shape, input_bias_shape,
output_shape, true, input_a_values, input_b_values,
input_bias_values, MATMUL_OP_ADDITION, ZDNN_OK,
expected_values);
}
/**
* - MatMulCompare (non-stacked)
*
* - Matrix input_a = 3x3 -- Manually Coded Input
* - Matrix input_b = 3x3 -- Manually Coded Input
* - Matrix bias = 3 -- Manually Coded Input
* - Matrix output = 3x3
*/
void test_compare_3x3_by_3x3(zdnn_matmul_ops op, float *exp_vals) {
//
// input values are derivatives of power-of-2 numbers to minimum
// precision loss due to conversion, as that affects comparsions
// Setup Input A
uint32_t input_a_shape[] = {3, 3};
float input_a_values[] = {
1.0 / 2, 1.0 / 4, 1.0 / 8, 1.0 / 16, 1.0 / 32,
1.0 / 64, 1.0 / 32, 1.0 / 16, 1.0 / 8,
};
// Setup Input B
uint32_t input_b_shape[] = {3, 3};
float input_b_values[] = {2, 4, 8, 16, 2, 4, 8, 16, 2};
// Setup Input bias
uint32_t input_c_shape[] = {3};
float input_c_values[] = {0.65, 4.5, 0.7};
// Output tensor and expected values
uint32_t output_shape[] = {input_a_shape[0], input_b_shape[1]};
zdnn_matmul_op_test(input_a_shape, input_b_shape, input_c_shape, output_shape,
false, input_a_values, input_b_values, input_c_values, op,
ZDNN_OK, exp_vals);
}
void zdnn_matmul_compare_3x3_by_3x3_greater() {
float is_greater_exp_vals[] = {1., 0., 1., 1., 0., 0., 1., 0., 1.};
test_compare_3x3_by_3x3(MATMUL_OP_GREATER, is_greater_exp_vals);
}
void zdnn_matmul_compare_3x3_by_3x3_greater_equal() {
float is_greater_equal_exp_vals[] = {1., 1., 1., 1., 0., 0., 1., 0., 1.};
test_compare_3x3_by_3x3(MATMUL_OP_GREATER_EQUAL, is_greater_equal_exp_vals);
}
void zdnn_matmul_compare_3x3_by_3x3_equal() {
float is_equal_exp_vals[] = {0., 1., 0., 0., 0., 0., 0., 0., 0.};
test_compare_3x3_by_3x3(MATMUL_OP_EQUAL, is_equal_exp_vals);
}
void zdnn_matmul_compare_3x3_by_3x3_not_equal() {
float is_not_equal_exp_vals[] = {1., 0., 1., 1., 1., 1., 1., 1., 1.};
test_compare_3x3_by_3x3(MATMUL_OP_NOT_EQUAL, is_not_equal_exp_vals);
}
void zdnn_matmul_compare_3x3_by_3x3_lesser_equal() {
float is_lesser_equal_exp_vals[] = {0., 1., 0., 0., 1., 1., 0., 1., 0.};
test_compare_3x3_by_3x3(MATMUL_OP_LESSER_EQUAL, is_lesser_equal_exp_vals);
}
void zdnn_matmul_compare_3x3_by_3x3_lesser() {
float is_lesser_exp_vals[] = {0., 0., 0., 0., 1., 1., 0., 1., 0.};
test_compare_3x3_by_3x3(MATMUL_OP_LESSER, is_lesser_exp_vals);
}
/**
* - MatMulCompare (non-stacked, bigger values)
*
* - Matrix input_a = 3x3 -- Manually Coded Input
* - Matrix input_b = 3x3 -- Manually Coded Input
* - Matrix bias = 3 -- Manually Coded Input
* - Matrix output = 3x3
*/
void test_compare_3x3_by_3x3_bigger_vals(zdnn_matmul_ops op, float *exp_vals) {
// Setup Input A
uint32_t input_a_shape[] = {3, 3};
float input_a_values[] = {1, 2, 3, 4, 5, 6, 7, 8, 9};
// Setup Input B
uint32_t input_b_shape[] = {3, 3};
float input_b_values[] = {10, 20, 30, 40, 50, 60, 70, 80, 90};
// Setup Input bias
uint32_t input_c_shape[] = {3};
float input_c_values[] = {650, 360, 1000};
// Output tensor and expected values
uint32_t output_shape[] = {input_a_shape[0], input_b_shape[1]};
zdnn_matmul_op_test(input_a_shape, input_b_shape, input_c_shape, output_shape,
false, input_a_values, input_b_values, input_c_values, op,
ZDNN_OK, exp_vals);
}
void zdnn_matmul_compare_3x3_by_3x3_bigger_vals_greater() {
float is_greater_exp_vals[] = {0, 0, 0, 1, 1, 0, 1, 1, 1};
test_compare_3x3_by_3x3_bigger_vals(MATMUL_OP_GREATER, is_greater_exp_vals);
}
void zdnn_matmul_compare_3x3_by_3x3_bigger_vals_greater_equal() {
float is_greater_equal_exp_vals[] = {0, 1, 0, 1, 1, 0, 1, 1, 1};
test_compare_3x3_by_3x3_bigger_vals(MATMUL_OP_GREATER_EQUAL,
is_greater_equal_exp_vals);
}
void zdnn_matmul_compare_3x3_by_3x3_bigger_vals_equal() {
float is_equal_exp_vals[] = {0, 1, 0, 0, 0, 0, 0, 0, 0};
test_compare_3x3_by_3x3_bigger_vals(MATMUL_OP_EQUAL, is_equal_exp_vals);
}
void zdnn_matmul_compare_3x3_by_3x3_bigger_vals_not_equal() {
float is_not_equal_exp_vals[] = {1, 0, 1, 1, 1, 1, 1, 1, 1};
test_compare_3x3_by_3x3_bigger_vals(MATMUL_OP_NOT_EQUAL,
is_not_equal_exp_vals);
}
void zdnn_matmul_compare_3x3_by_3x3_bigger_vals_lesser_equal() {
float is_lesser_equal_exp_vals[] = {1, 1, 1, 0, 0, 1, 0, 0, 0};
test_compare_3x3_by_3x3_bigger_vals(MATMUL_OP_LESSER_EQUAL,
is_lesser_equal_exp_vals);
}
void zdnn_matmul_compare_3x3_by_3x3_bigger_vals_lesser() {
float is_lesser_exp_vals[] = {1, 0, 1, 0, 0, 1, 0, 0, 0};
test_compare_3x3_by_3x3_bigger_vals(MATMUL_OP_LESSER, is_lesser_exp_vals);
}
/**
* - MatMulCompare (non-stacked)
*
* - Matrix input_a = 4x3 -- Manually Coded Input
* - Matrix input_b = 3x2 -- Manually Coded Input
* - Matrix bias = 2 -- Manually Coded Input
* - Matrix output = 4x2
*/
void test_compare_4x3_by_3x2(zdnn_matmul_ops op, float *exp_vals) {
// Setup Input A
uint32_t input_a_shape[] = {4, 3};
float input_a_values[] = {1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12};
// Setup Input B
uint32_t input_b_shape[] = {3, 2};
float input_b_values[] = {1, 2, 3, 4, 5, 6};
// Setup Input bias
uint32_t input_c_shape[] = {2};
float input_c_values[] = {50, 100};
// Output tensor and expected values
uint32_t output_shape[] = {input_a_shape[0], input_b_shape[1]};
zdnn_matmul_op_test(input_a_shape, input_b_shape, input_c_shape, output_shape,
false, input_a_values, input_b_values, input_c_values, op,
ZDNN_OK, exp_vals);
}
void zdnn_matmul_compare_4x3_by_3x2_greater() {
float is_greater_exp_vals[] = {0, 0, 0, 0, 1, 0, 1, 1};
test_compare_4x3_by_3x2(MATMUL_OP_GREATER, is_greater_exp_vals);
}
void zdnn_matmul_compare_4x3_by_3x2_greater_equal() {
float is_greater_equal_exp_vals[] = {0, 0, 0, 0, 1, 1, 1, 1};
test_compare_4x3_by_3x2(MATMUL_OP_GREATER_EQUAL, is_greater_equal_exp_vals);
}
void zdnn_matmul_compare_4x3_by_3x2_equal() {
float is_equal_exp_vals[] = {0, 0, 0, 0, 0, 1, 0, 0};
test_compare_4x3_by_3x2(MATMUL_OP_EQUAL, is_equal_exp_vals);
}
void zdnn_matmul_compare_4x3_by_3x2_not_equal() {
float is_not_equal_exp_vals[] = {1, 1, 1, 1, 1, 0, 1, 1};
test_compare_4x3_by_3x2(MATMUL_OP_NOT_EQUAL, is_not_equal_exp_vals);
}
void zdnn_matmul_compare_4x3_by_3x2_lesser_equal() {
float is_lesser_equal_exp_vals[] = {1, 1, 1, 1, 0, 1, 0, 0};
test_compare_4x3_by_3x2(MATMUL_OP_LESSER_EQUAL, is_lesser_equal_exp_vals);
}
void zdnn_matmul_compare_4x3_by_3x2_lesser() {
float is_lesser_exp_vals[] = {1, 1, 1, 1, 0, 0, 0, 0};
test_compare_4x3_by_3x2(MATMUL_OP_LESSER, is_lesser_exp_vals);
}
/**
* - MatMulBiasAdd Boardcast
*
* - Matrix input_a = s x m x n --Randomly Generated Positive/Negative Array
* - Matrix input_b = 1 x n x p --Randomly Generated Positive/Negative Array
* - Matrix bias = 1 x p --Randomly Generated Positive Array
* - Matrix output = s x m x p
*/
void zdnn_matmul_bcast_op_smn_by_np(uint64_t s, uint64_t m, uint64_t n,
uint64_t p) {
uint64_t num_values = 0;
// Setup Input A using random values
uint32_t input_a_shape[] = {s, m, n};
num_values = s * m * n;
float input_a_values[num_values];
gen_random_float_array_pos_neg(num_values, input_a_values);
// Setup Input B using random values
uint32_t input_b_shape[] = {n, p};
num_values = n * p;
float input_b_values[s * num_values];
gen_random_float_array_pos_neg(num_values, input_b_values);
// manually "broadcast" those n*p entries s times across input_b_values[]
// because gen_test_expected_fp32_array2() doesn't handle broadcast natively
uint64_t size = n * p * sizeof(float);
uint8_t *tmp_ptr = (uint8_t *)((uintptr_t)input_b_values + size);
for (uint64_t i = 1; i < s; i++) {
memcpy((void *)tmp_ptr, (void *)input_b_values, size);
tmp_ptr += size;
}
// Setup Input bias using random values
uint32_t input_bias_shape[] = {p};
num_values = p;
float input_bias_values[s * num_values];
gen_random_float_array(num_values, input_bias_values);
size = p * sizeof(float);
tmp_ptr = (uint8_t *)((uintptr_t)input_bias_values + size);
for (uint64_t i = 1; i < s; i++) {
memcpy((void *)tmp_ptr, (void *)input_bias_values, size);
tmp_ptr += size;
}
// Setup Output and expected values
uint32_t output_shape[] = {s, m, p};
num_values = s * m * p;
float expected_values[num_values];
gen_test_expected_fp32_array(s, m, n, p, test_datatype, input_a_values,
input_b_values, input_bias_values,
expected_values);
zdnn_matmul_bcast_op_test(input_a_shape, input_b_shape, input_bias_shape,
output_shape, input_a_values, input_b_values,
input_bias_values, MATMUL_BCAST_OP_ADDITION,
ZDNN_OK, expected_values);
}
void zdnn_matmul_biasadd_3x10x11_by_3x11x2() {
zdnn_matmul_biasadd_smn_by_snp(3, 10, 11, 2);
}
void zdnn_matmul_bcast_op_3x10x11_by_11x2() {
zdnn_matmul_bcast_op_smn_by_np(3, 10, 11, 2);
}
int main() {
UNITY_BEGIN();
RUN_TEST_ALL_DLFLOAT16_PRE_DATATYPES(zdnn_matmul_biasadd_3x3_by_3x3);
RUN_TEST_ALL_DLFLOAT16_PRE_DATATYPES(
zdnn_matmul_biasadd_3x3_by_3x3_bigger_vals);
RUN_TEST_ALL_DLFLOAT16_PRE_DATATYPES(zdnn_matmul_biasadd_4x3_by_3x2);
RUN_TEST_ALL_DLFLOAT16_PRE_DATATYPES(zdnn_matmul_biasadd_3x10x11_by_3x11x2);
RUN_TEST_ALL_DLFLOAT16_PRE_DATATYPES(zdnn_matmul_compare_3x3_by_3x3_greater);
RUN_TEST_ALL_DLFLOAT16_PRE_DATATYPES(
zdnn_matmul_compare_3x3_by_3x3_greater_equal);
RUN_TEST_ALL_DLFLOAT16_PRE_DATATYPES(zdnn_matmul_compare_3x3_by_3x3_equal);
RUN_TEST_ALL_DLFLOAT16_PRE_DATATYPES(
zdnn_matmul_compare_3x3_by_3x3_not_equal);
RUN_TEST_ALL_DLFLOAT16_PRE_DATATYPES(
zdnn_matmul_compare_3x3_by_3x3_lesser_equal);
RUN_TEST_ALL_DLFLOAT16_PRE_DATATYPES(zdnn_matmul_compare_3x3_by_3x3_lesser);
RUN_TEST_ALL_DLFLOAT16_PRE_DATATYPES(
zdnn_matmul_compare_3x3_by_3x3_bigger_vals_greater);
RUN_TEST_ALL_DLFLOAT16_PRE_DATATYPES(
zdnn_matmul_compare_3x3_by_3x3_bigger_vals_greater_equal);
RUN_TEST_ALL_DLFLOAT16_PRE_DATATYPES(
zdnn_matmul_compare_3x3_by_3x3_bigger_vals_equal);
RUN_TEST_ALL_DLFLOAT16_PRE_DATATYPES(
zdnn_matmul_compare_3x3_by_3x3_bigger_vals_not_equal);
RUN_TEST_ALL_DLFLOAT16_PRE_DATATYPES(
zdnn_matmul_compare_3x3_by_3x3_bigger_vals_lesser_equal);
RUN_TEST_ALL_DLFLOAT16_PRE_DATATYPES(
zdnn_matmul_compare_3x3_by_3x3_bigger_vals_lesser);
RUN_TEST_ALL_DLFLOAT16_PRE_DATATYPES(zdnn_matmul_compare_4x3_by_3x2_greater);
RUN_TEST_ALL_DLFLOAT16_PRE_DATATYPES(
zdnn_matmul_compare_4x3_by_3x2_greater_equal);
RUN_TEST_ALL_DLFLOAT16_PRE_DATATYPES(zdnn_matmul_compare_4x3_by_3x2_equal);
RUN_TEST_ALL_DLFLOAT16_PRE_DATATYPES(
zdnn_matmul_compare_4x3_by_3x2_not_equal);
RUN_TEST_ALL_DLFLOAT16_PRE_DATATYPES(
zdnn_matmul_compare_4x3_by_3x2_lesser_equal);
RUN_TEST_ALL_DLFLOAT16_PRE_DATATYPES(zdnn_matmul_compare_4x3_by_3x2_lesser);
RUN_TEST_ALL_DLFLOAT16_PRE_DATATYPES(zdnn_matmul_bcast_op_3x10x11_by_11x2);
return UNITY_END();
}
zDNN-1.1.2/tests/testDriver_zdnn_matmul_transpose_op.c 0000664 0000000 0000000 00000210401 15000221702 0023154 0 ustar 00root root 0000000 0000000 // SPDX-License-Identifier: Apache-2.0
/*
* Copyright IBM Corp. 2023, 2024
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#include "testsupport.h"
#include
#include
#include
void setUp(void) {
tol_bfloat.ulps = 64;
tol_bfloat.epsilon_mult = (0.1 / EPSILON_BFLOAT) + 1;
tol_fp16.ulps = 64;
tol_fp16.epsilon_mult = (0.1 / EPSILON_FP16) + 1;
tol_fp32.ulps = 64 * 16384;
tol_fp32.epsilon_mult = (0.1 / EPSILON_FLOAT) + 1;
VERIFY_HW_ENV;
VERIFY_PARMBLKFORMAT_1;
}
void tearDown(void){}
/**
* Helper macro that given the indices and sizes of a multidimensional array
* returns equivalent index to a flat representation of the same array. The
* result is cast to uint64_t as that's the largest number of total elements a
* ztensor supports as opposed to the single dimension maximum of unint32_t
*
* Note: Default usage is for 3D arrays. For 2D arrays, use 0 for the
* undefined dimension's index and 1 its size.
*/
#define GET_FLAT_IDX(stack, row, col, row_size, col_size) \
(uint64_t)(stack) * (row_size) * (col_size) + (row) * (col_size) + (col)
/**
* Helper function to print matmul arrays. 3D arrays are printed as separate
* stacks of 2D arrays.
*/
void print_matmul_array(uint32_t s, uint32_t r, uint32_t c, char *name,
float *arr) {
printf("Printing \"%s\" as %u stack(s) of array[%u][%u]\n", name, s, r, c);
for (uint32_t i = 0; i < s; i++) {
printf("\"%s\" stack %u\n", name, i);
for (uint32_t j = 0; j < r; j++) {
for (uint32_t k = 0; k < c; k++) {
printf("%f ", arr[GET_FLAT_IDX(i, j, k, r, c)]);
}
printf("\n");
}
}
printf("end \"%s\"\n\n", name);
}
/**
* Helper function to compute expected output tensor from randomly generated
* test input arrays.
*
* | first | second | bias | result |
* | (s, m, n) | (s, n, p) | (s, p) | (s, m, p) |
*
*/
void gen_test_expected_fp32_array(uint32_t s, uint32_t m, uint32_t n,
uint32_t p, zdnn_data_types type,
float *first, float *second, float *bias,
float *result) {
for (uint32_t i = 0; i < s; i++) { // MATRIX from stack
for (uint32_t j = 0; j < m; j++) { // ROW of Mat 1
for (uint32_t k = 0; k < p; k++) { // COL of Mat 2
uint64_t result_idx = GET_FLAT_IDX(i, j, k, m, p);
uint64_t bias_idx = GET_FLAT_IDX(i, 0, k, 1, p);
float cleansed_bias = 0;
switch (type) {
case (BFLOAT):
cleansed_bias = CLEANSE_BFLOAT(bias[bias_idx]);
break;
case (FP16):
cleansed_bias = CLEANSE_FP16(bias[bias_idx]);
break;
case (FP32):
cleansed_bias = CLEANSE_FP32(bias[bias_idx]);
break;
default:
break;
}
result[result_idx] = cleansed_bias; // bias add
BEGIN_BLOCK_IF_LOGLEVEL_DEBUG {
printf("result[%u][%u][%u] = ", i, j, k);
}
for (uint32_t l = 0; l < n; l++) { // COL of Mat 1
uint64_t first_idx = GET_FLAT_IDX(i, j, l, m, n);
uint64_t second_idx = GET_FLAT_IDX(i, l, k, n, p);
float cleansed_first = 0;
float cleansed_second = 0;
switch (type) {
case (BFLOAT):
cleansed_first = CLEANSE_BFLOAT(first[first_idx]);
cleansed_second = CLEANSE_BFLOAT(second[second_idx]);
break;
case (FP16):
cleansed_first = CLEANSE_FP16(first[first_idx]);
cleansed_second = CLEANSE_FP16(second[second_idx]);
break;
case (FP32):
cleansed_first = CLEANSE_FP32(first[first_idx]);
cleansed_second = CLEANSE_FP32(second[second_idx]);
break;
default:
break;
}
result[result_idx] += cnvt_1_dlf16_to_fp32(cnvt_1_fp32_to_dlf16(
cleansed_first * cleansed_second)); // dot product
// Prints the math that generates each cell of the output.
BEGIN_BLOCK_IF_LOGLEVEL_DEBUG {
printf("(%f * %f) + ", cleansed_first, cleansed_second);
}
}
BEGIN_BLOCK_IF_LOGLEVEL_DEBUG {
printf("%f = %f\n", cleansed_bias, result[result_idx]);
}
}
}
}
}
/**
* Helper function to transpose randomly generated test input arrays.
*
* (s, m, n) -> (s, n, m)
*
*/
void transpose_array(uint32_t s, uint32_t m, uint32_t n, const float *input,
float *result) {
for (uint32_t i = 0; i < s; i++) { // MATRIX from stack
for (uint32_t j = 0; j < m; j++) { // ROW of Mat 1
for (uint32_t k = 0; k < n; k++) { // COL of Mat 1
uint64_t input_idx = GET_FLAT_IDX(i, j, k, m, n);
uint64_t result_idx = GET_FLAT_IDX(i, k, j, n, m);
result[result_idx] = input[input_idx];
}
}
}
}
/**
* zdnn_matmul_op_test
*
* Handles all the logic to run custom tests.
*
* shapes are in interpreted as:
* - input_a = s x m x n ZDNN_3DS
* - input_b = s x n x p ZDNN_3DS
* - bias = s x p ZDNN_2DS
* - output = s x m x p ZDNN_3DS
*
*/
void zdnn_matmul_op_test(uint32_t *input_a_shape, uint32_t *input_b_shape,
uint32_t *input_bias_shape, uint32_t *output_shape,
bool is_stacked, float *input_a, float *input_b,
float *bias, bool transpose_a, bool transpose_b,
zdnn_matmul_ops op_type, zdnn_status expected_status,
float *expected_values) {
/*
* Input A Tensor
*/
zdnn_ztensor *input_a_ztensor =
alloc_ztensor_with_values(input_a_shape, is_stacked ? ZDNN_3DS : ZDNN_2D,
test_datatype, NO_CONCAT, false, input_a);
/*
* Input B Tensor
*/
zdnn_ztensor *input_b_ztensor =
alloc_ztensor_with_values(input_b_shape, is_stacked ? ZDNN_3DS : ZDNN_2D,
test_datatype, NO_CONCAT, false, input_b);
/*
* Bias Tensor
*/
zdnn_ztensor *input_bias_ztensor = alloc_ztensor_with_values(
input_bias_shape, is_stacked ? ZDNN_2DS : ZDNN_1D, test_datatype,
NO_CONCAT, false, bias);
/*
* Output Tensor
*/
zdnn_ztensor *output_ztensor =
alloc_ztensor_with_values(output_shape, is_stacked ? ZDNN_3DS : ZDNN_2D,
test_datatype, NO_CONCAT, true, ZERO_ARRAY);
/*
* Get back zDNN test status
*/
zdnn_status test_status = GENERAL_TESTCASE_FAILURE;
test_status = zdnn_matmul_transpose_op(input_a_ztensor, input_b_ztensor,
input_bias_ztensor, transpose_a,
transpose_b, op_type, output_ztensor);
TEST_ASSERT_MESSAGE_FORMATTED(
expected_status == test_status,
"Expected status %08x from zdnn_matmul_transpose_op() with %d Op but "
"%08x was returned.",
expected_status, op_type, test_status);
BEGIN_BLOCK_IF_LOGLEVEL_DEBUG {
int s = input_b_ztensor->transformed_desc->dim4;
int m = input_a_ztensor->transformed_desc->dim2;
int n = input_a_ztensor->transformed_desc->dim1;
int p = input_b_ztensor->transformed_desc->dim1;
print_matmul_array(1, m, n, "input_a", input_a);
print_matmul_array(s, n, p, "input_b", input_b);
print_matmul_array(s, 1, p, "bias", bias);
print_matmul_array(s, m, p, "expected_values", expected_values);
}
fp_tolerance *tol = NULL;
switch (output_ztensor->pre_transformed_desc->type) {
case BFLOAT:
tol = &tol_bfloat;
break;
case FP16:
tol = &tol_fp16;
break;
case FP32:
tol = &tol_fp32;
break;
default:
break;
// should never get here
}
// Only check expected values if we expected the NNPA call to be successful
if (expected_status == ZDNN_OK) {
assert_ztensor_values_adv(output_ztensor, false, expected_values, *tol);
}
// All done--clean up the tensor buffers
free_ztensor_buffers(4, input_a_ztensor, input_b_ztensor, input_bias_ztensor,
output_ztensor);
}
/**
* - MatMul BiasAdd (non-stacked)
*
* - Matrix input_a = 4x3 -- Manually Coded Input
* - Matrix input_b = 3x2 -- Manually Coded Input
* - Matrix bias = 2 -- Manually Coded Input
* - Matrix output = 4x2
*/
void zdnn_matmul_biasadd_4x3_by_3x2() {
// Setup Input A
uint32_t input_a_shape[] = {4, 3};
float input_a_values[] = {1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12};
// Setup Input B
uint32_t input_b_shape[] = {3, 2};
float input_b_values[] = {1, 2, 3, 4, 5, 6};
// Setup Input bias
uint32_t input_bias_shape[] = {2};
float input_bias_values[] = {3, 3};
// Output tensor and expected values
uint32_t output_shape[] = {4, 2};
float expected_values[] = {25, 31, 52, 67, 79, 103, 106, 139};
zdnn_matmul_op_test(input_a_shape, input_b_shape, input_bias_shape,
output_shape, false, input_a_values, input_b_values,
input_bias_values, false, false, MATMUL_OP_ADDITION,
ZDNN_OK, expected_values);
}
/**
* - MatMul BiasAdd (non-stacked) (transpose_a)
*
* - Matrix input_a = 3x4 -- Manually Coded Input
* - Matrix input_b = 3x2 -- Manually Coded Input
* - Matrix bias = 2 -- Manually Coded Input
* - Matrix output = 4x2
*/
void zdnn_matmul_biasadd_3x4_by_3x2() {
// Setup Input A
uint32_t input_a_shape[] = {3, 4};
float input_a_values[] = {1, 4, 7, 10, 2, 5, 8, 11, 3, 6, 9, 12};
// Setup Input B
uint32_t input_b_shape[] = {3, 2};
float input_b_values[] = {1, 2, 3, 4, 5, 6};
// Setup Input bias
uint32_t input_bias_shape[] = {2};
float input_bias_values[] = {3, 3};
// Output tensor and expected values
uint32_t output_shape[] = {4, 2};
float expected_values[] = {25, 31, 52, 67, 79, 103, 106, 139};
zdnn_matmul_op_test(input_a_shape, input_b_shape, input_bias_shape,
output_shape, false, input_a_values, input_b_values,
input_bias_values, true, false, MATMUL_OP_ADDITION,
ZDNN_OK, expected_values);
}
/**
* - MatMul BiasAdd (non-stacked) (transpose_b)
*
* - Matrix input_a = 4x3 -- Manually Coded Input
* - Matrix input_b = 2x3 -- Manually Coded Input
* - Matrix bias = 2 -- Manually Coded Input
* - Matrix output = 4x2
*/
void zdnn_matmul_biasadd_4x3_by_2x3() {
// Setup Input A
uint32_t input_a_shape[] = {4, 3};
float input_a_values[] = {1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12};
// Setup Input B
uint32_t input_b_shape[] = {2, 3};
float input_b_values[] = {1, 3, 5, 2, 4, 6};
// Setup Input bias
uint32_t input_bias_shape[] = {2};
float input_bias_values[] = {3, 3};
// Output tensor and expected values
uint32_t output_shape[] = {4, 2};
float expected_values[] = {25, 31, 52, 67, 79, 103, 106, 139};
zdnn_matmul_op_test(input_a_shape, input_b_shape, input_bias_shape,
output_shape, false, input_a_values, input_b_values,
input_bias_values, false, true, MATMUL_OP_ADDITION,
ZDNN_OK, expected_values);
}
/**
* - MatMul BiasAdd (non-stacked) (transpose_a and transpose_b)
*
* - Matrix input_a = 3x4 -- Manually Coded Input
* - Matrix input_b = 2x3 -- Manually Coded Input
* - Matrix bias = 2 -- Manually Coded Input
* - Matrix output = 4x2
*/
void zdnn_matmul_biasadd_3x4_by_2x3() {
// Setup Input A
uint32_t input_a_shape[] = {3, 4};
float input_a_values[] = {1, 4, 7, 10, 2, 5, 8, 11, 3, 6, 9, 12};
// Setup Input B
uint32_t input_b_shape[] = {2, 3};
float input_b_values[] = {1, 3, 5, 2, 4, 6};
// Setup Input bias
uint32_t input_bias_shape[] = {2};
float input_bias_values[] = {3, 3};
// Output tensor and expected values
uint32_t output_shape[] = {4, 2};
float expected_values[] = {25, 31, 52, 67, 79, 103, 106, 139};
zdnn_matmul_op_test(input_a_shape, input_b_shape, input_bias_shape,
output_shape, false, input_a_values, input_b_values,
input_bias_values, true, true, MATMUL_OP_ADDITION,
ZDNN_OK, expected_values);
}
/**
* - MatMul BiasAdd (stacked)
*
* - Matrix input_a = s x m x n --Randomly Generated Positive/Negative Array
* - Matrix input_b = s x n x p --Randomly Generated Positive/Negative Array
* - Matrix bias = s x p --Randomly Generated Positive Array
* - Matrix output = s x m x p
*/
void zdnn_matmul_biasadd_smn_by_snp(uint64_t s, uint64_t m, uint64_t n,
uint64_t p) {
uint64_t num_values = 0;
// Setup Input A using random values
uint32_t input_a_shape[] = {s, m, n};
num_values = s * m * n;
float input_a_values[num_values];
gen_random_float_array_pos_neg(num_values, input_a_values);
// Setup Input B using random values
uint32_t input_b_shape[] = {s, n, p};
num_values = s * n * p;
float input_b_values[num_values];
gen_random_float_array_pos_neg(num_values, input_b_values);
// Setup Input bias using random values
uint32_t input_bias_shape[] = {s, p};
num_values = s * p;
float input_bias_values[num_values];
gen_random_float_array(num_values, input_bias_values);
// Setup Output and expected values
uint32_t output_shape[] = {s, m, p};
num_values = s * m * p;
float expected_values[num_values];
gen_test_expected_fp32_array(s, m, n, p, test_datatype, input_a_values,
input_b_values, input_bias_values,
expected_values);
zdnn_matmul_op_test(input_a_shape, input_b_shape, input_bias_shape,
output_shape, true, input_a_values, input_b_values,
input_bias_values, false, false, MATMUL_OP_ADDITION,
ZDNN_OK, expected_values);
}
/**
* - MatMul BiasAdd (stacked) (transpose_a)
*
* - Matrix input_a = s x n x m --Randomly Generated Positive/Negative Array
* - Matrix input_b = s x n x p --Randomly Generated Positive/Negative Array
* - Matrix bias = s x p --Randomly Generated Positive Array
* - Matrix output = s x m x p
*/
void zdnn_matmul_biasadd_snm_by_snp(uint64_t s, uint64_t m, uint64_t n,
uint64_t p) {
uint64_t num_values = 0;
// Setup Input A using random values
uint32_t input_a_shape[] = {s, n, m};
num_values = s * m * n;
float input_a_values[num_values];
gen_random_float_array_pos_neg(num_values, input_a_values);
// transpose input so we know can generate expected values
float input_at_values[num_values];
transpose_array(s, n, m, input_a_values, input_at_values);
// Setup Input B using random values
uint32_t input_b_shape[] = {s, n, p};
num_values = s * n * p;
float input_b_values[num_values];
gen_random_float_array_pos_neg(num_values, input_b_values);
// Setup Input bias using random values
uint32_t input_bias_shape[] = {s, p};
num_values = s * p;
float input_bias_values[num_values];
gen_random_float_array(num_values, input_bias_values);
// Setup Output and expected values
uint32_t output_shape[] = {s, m, p};
num_values = s * m * p;
float expected_values[num_values];
gen_test_expected_fp32_array(s, m, n, p, test_datatype, input_at_values,
input_b_values, input_bias_values,
expected_values);
zdnn_matmul_op_test(input_a_shape, input_b_shape, input_bias_shape,
output_shape, true, input_a_values, input_b_values,
input_bias_values, true, false, MATMUL_OP_ADDITION,
ZDNN_OK, expected_values);
}
/**
* - MatMul BiasAdd (stacked) (transpose_b)
*
* - Matrix input_a = s x m x n --Randomly Generated Positive/Negative Array
* - Matrix input_b = s x p x n --Randomly Generated Positive/Negative Array
* - Matrix bias = s x p --Randomly Generated Positive Array
* - Matrix output = s x m x p
*/
void zdnn_matmul_biasadd_smn_by_spn(uint64_t s, uint64_t m, uint64_t n,
uint64_t p) {
uint64_t num_values = 0;
// Setup Input A using random values
uint32_t input_a_shape[] = {s, m, n};
num_values = s * m * n;
float input_a_values[num_values];
gen_random_float_array_pos_neg(num_values, input_a_values);
// Setup Input B using random values
uint32_t input_b_shape[] = {s, p, n};
num_values = s * n * p;
float input_b_values[num_values];
gen_random_float_array_pos_neg(num_values, input_b_values);
// transpose input so we know can generate expected values
float input_bt_values[num_values];
transpose_array(s, p, n, input_b_values, input_bt_values);
// Setup Input bias using random values
uint32_t input_bias_shape[] = {s, p};
num_values = s * p;
float input_bias_values[num_values];
gen_random_float_array(num_values, input_bias_values);
// Setup Output and expected values
uint32_t output_shape[] = {s, m, p};
num_values = s * m * p;
float expected_values[num_values];
gen_test_expected_fp32_array(s, m, n, p, test_datatype, input_a_values,
input_bt_values, input_bias_values,
expected_values);
zdnn_matmul_op_test(input_a_shape, input_b_shape, input_bias_shape,
output_shape, true, input_a_values, input_b_values,
input_bias_values, false, true, MATMUL_OP_ADDITION,
ZDNN_OK, expected_values);
}
/**
* - MatMul BiasAdd (stacked) (transpose_a and transpose_b)
*
* - Matrix input_a = s x n x m --Randomly Generated Positive/Negative Array
* - Matrix input_b = s x p x n --Randomly Generated Positive/Negative Array
* - Matrix bias = s x p --Randomly Generated Positive Array
* - Matrix output = s x m x p
*/
void zdnn_matmul_biasadd_snm_by_spn(uint64_t s, uint64_t m, uint64_t n,
uint64_t p) {
uint64_t num_values = 0;
// Setup Input A using random values
uint32_t input_a_shape[] = {s, n, m};
num_values = s * m * n;
float input_a_values[num_values];
gen_random_float_array_pos_neg(num_values, input_a_values);
// transpose input so we know can generate expected values
float input_at_values[num_values];
transpose_array(s, n, m, input_a_values, input_at_values);
// Setup Input B using random values
uint32_t input_b_shape[] = {s, p, n};
num_values = s * n * p;
float input_b_values[num_values];
gen_random_float_array_pos_neg(num_values, input_b_values);
// transpose input so we know can generate expected values
float input_bt_values[num_values];
transpose_array(s, p, n, input_b_values, input_bt_values);
// Setup Input bias using random values
uint32_t input_bias_shape[] = {s, p};
num_values = s * p;
float input_bias_values[num_values];
gen_random_float_array(num_values, input_bias_values);
// Setup Output and expected values
uint32_t output_shape[] = {s, m, p};
num_values = s * m * p;
float expected_values[num_values];
gen_test_expected_fp32_array(s, m, n, p, test_datatype, input_at_values,
input_bt_values, input_bias_values,
expected_values);
zdnn_matmul_op_test(input_a_shape, input_b_shape, input_bias_shape,
output_shape, true, input_a_values, input_b_values,
input_bias_values, true, true, MATMUL_OP_ADDITION,
ZDNN_OK, expected_values);
}
void zdnn_matmul_biasadd_3x10x11_by_3x11x2() {
zdnn_matmul_biasadd_smn_by_snp(3, 10, 11, 2);
zdnn_matmul_biasadd_snm_by_snp(3, 10, 11, 2);
zdnn_matmul_biasadd_smn_by_spn(3, 10, 11, 2);
zdnn_matmul_biasadd_snm_by_spn(3, 10, 11, 2);
}
/**
* - MatMul Compare (non-stacked)
*
* - Matrix input_a = 4x3 -- Manually Coded Input
* - Matrix input_b = 3x2 -- Manually Coded Input
* - Matrix bias = 2 -- Manually Coded Input
* - Matrix output = 4x2
*/
void test_compare_4x3_by_3x2(zdnn_matmul_ops op, float *exp_vals) {
// Setup Input A
uint32_t input_a_shape[] = {4, 3};
float input_a_values[] = {1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12};
// Setup Input B
uint32_t input_b_shape[] = {3, 2};
float input_b_values[] = {1, 2, 3, 4, 5, 6};
// Setup Input bias
uint32_t input_c_shape[] = {2};
float input_c_values[] = {50, 100};
// Output tensor and expected values
uint32_t output_shape[] = {4, 2};
zdnn_matmul_op_test(input_a_shape, input_b_shape, input_c_shape, output_shape,
false, input_a_values, input_b_values, input_c_values,
false, false, op, ZDNN_OK, exp_vals);
}
/**
* - MatMul Compare (non-stacked) (transpose_a)
*
* - Matrix input_a = 3x4 -- Manually Coded Input
* - Matrix input_b = 3x2 -- Manually Coded Input
* - Matrix bias = 2 -- Manually Coded Input
* - Matrix output = 4x2
*/
void test_compare_3x4_by_3x2(zdnn_matmul_ops op, float *exp_vals) {
// Setup Input A
uint32_t input_a_shape[] = {3, 4};
float input_a_values[] = {1, 4, 7, 10, 2, 5, 8, 11, 3, 6, 9, 12};
// Setup Input B
uint32_t input_b_shape[] = {3, 2};
float input_b_values[] = {1, 2, 3, 4, 5, 6};
// Setup Input bias
uint32_t input_c_shape[] = {2};
float input_c_values[] = {50, 100};
// Output tensor and expected values
uint32_t output_shape[] = {4, 2};
zdnn_matmul_op_test(input_a_shape, input_b_shape, input_c_shape, output_shape,
false, input_a_values, input_b_values, input_c_values,
true, false, op, ZDNN_OK, exp_vals);
}
/**
* - MatMul Compare (non-stacked) (transpose_b)
*
* - Matrix input_a = 4x3 -- Manually Coded Input
* - Matrix input_b = 2x3 -- Manually Coded Input
* - Matrix bias = 2 -- Manually Coded Input
* - Matrix output = 4x2
*/
void test_compare_4x3_by_2x3(zdnn_matmul_ops op, float *exp_vals) {
// Setup Input A
uint32_t input_a_shape[] = {4, 3};
float input_a_values[] = {1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12};
// Setup Input B
uint32_t input_b_shape[] = {2, 3};
float input_b_values[] = {1, 3, 5, 2, 4, 6};
// Setup Input bias
uint32_t input_c_shape[] = {2};
float input_c_values[] = {50, 100};
// Output tensor and expected values
uint32_t output_shape[] = {4, 2};
zdnn_matmul_op_test(input_a_shape, input_b_shape, input_c_shape, output_shape,
false, input_a_values, input_b_values, input_c_values,
false, true, op, ZDNN_OK, exp_vals);
}
/**
* - MatMul Compare (non-stacked) (transpose_a and transpose_b)
*
* - Matrix input_a = 3x4 -- Manually Coded Input
* - Matrix input_b = 2x3 -- Manually Coded Input
* - Matrix bias = 2 -- Manually Coded Input
* - Matrix output = 4x2
*/
void test_compare_3x4_by_2x3(zdnn_matmul_ops op, float *exp_vals) {
// Setup Input A
uint32_t input_a_shape[] = {3, 4};
float input_a_values[] = {1, 4, 7, 10, 2, 5, 8, 11, 3, 6, 9, 12};
// Setup Input B
uint32_t input_b_shape[] = {2, 3};
float input_b_values[] = {1, 3, 5, 2, 4, 6};
// Setup Input bias
uint32_t input_c_shape[] = {2};
float input_c_values[] = {50, 100};
// Output tensor and expected values
uint32_t output_shape[] = {4, 2};
zdnn_matmul_op_test(input_a_shape, input_b_shape, input_c_shape, output_shape,
false, input_a_values, input_b_values, input_c_values,
true, true, op, ZDNN_OK, exp_vals);
}
void zdnn_matmul_compare_4x3_by_3x2_greater() {
float is_greater_exp_vals[] = {0, 0, 0, 0, 1, 0, 1, 1};
test_compare_4x3_by_3x2(MATMUL_OP_GREATER, is_greater_exp_vals);
test_compare_3x4_by_3x2(MATMUL_OP_GREATER, is_greater_exp_vals);
test_compare_4x3_by_2x3(MATMUL_OP_GREATER, is_greater_exp_vals);
test_compare_3x4_by_2x3(MATMUL_OP_GREATER, is_greater_exp_vals);
}
void zdnn_matmul_compare_4x3_by_3x2_greater_equal() {
float is_greater_equal_exp_vals[] = {0, 0, 0, 0, 1, 1, 1, 1};
test_compare_4x3_by_3x2(MATMUL_OP_GREATER_EQUAL, is_greater_equal_exp_vals);
test_compare_3x4_by_3x2(MATMUL_OP_GREATER_EQUAL, is_greater_equal_exp_vals);
test_compare_4x3_by_2x3(MATMUL_OP_GREATER_EQUAL, is_greater_equal_exp_vals);
test_compare_3x4_by_2x3(MATMUL_OP_GREATER_EQUAL, is_greater_equal_exp_vals);
}
void zdnn_matmul_compare_4x3_by_3x2_equal() {
float is_equal_exp_vals[] = {0, 0, 0, 0, 0, 1, 0, 0};
test_compare_4x3_by_3x2(MATMUL_OP_EQUAL, is_equal_exp_vals);
test_compare_3x4_by_3x2(MATMUL_OP_EQUAL, is_equal_exp_vals);
test_compare_4x3_by_2x3(MATMUL_OP_EQUAL, is_equal_exp_vals);
test_compare_3x4_by_2x3(MATMUL_OP_EQUAL, is_equal_exp_vals);
}
void zdnn_matmul_compare_4x3_by_3x2_not_equal() {
float is_not_equal_exp_vals[] = {1, 1, 1, 1, 1, 0, 1, 1};
test_compare_4x3_by_3x2(MATMUL_OP_NOT_EQUAL, is_not_equal_exp_vals);
test_compare_3x4_by_3x2(MATMUL_OP_NOT_EQUAL, is_not_equal_exp_vals);
test_compare_4x3_by_2x3(MATMUL_OP_NOT_EQUAL, is_not_equal_exp_vals);
test_compare_3x4_by_2x3(MATMUL_OP_NOT_EQUAL, is_not_equal_exp_vals);
}
void zdnn_matmul_compare_4x3_by_3x2_lesser_equal() {
float is_lesser_equal_exp_vals[] = {1, 1, 1, 1, 0, 1, 0, 0};
test_compare_4x3_by_3x2(MATMUL_OP_LESSER_EQUAL, is_lesser_equal_exp_vals);
test_compare_3x4_by_3x2(MATMUL_OP_LESSER_EQUAL, is_lesser_equal_exp_vals);
test_compare_4x3_by_2x3(MATMUL_OP_LESSER_EQUAL, is_lesser_equal_exp_vals);
test_compare_3x4_by_2x3(MATMUL_OP_LESSER_EQUAL, is_lesser_equal_exp_vals);
}
void zdnn_matmul_compare_4x3_by_3x2_lesser() {
float is_lesser_exp_vals[] = {1, 1, 1, 1, 0, 0, 0, 0};
test_compare_4x3_by_3x2(MATMUL_OP_LESSER, is_lesser_exp_vals);
test_compare_3x4_by_3x2(MATMUL_OP_LESSER, is_lesser_exp_vals);
test_compare_4x3_by_2x3(MATMUL_OP_LESSER, is_lesser_exp_vals);
test_compare_3x4_by_2x3(MATMUL_OP_LESSER, is_lesser_exp_vals);
}
/**
* zdnn_matmul_bcast23_op_test
*
* Handles all the logic to run custom tests.
*
* shapes are in interpreted as:
* - input_a = s x m x n ZDNN_3DS
* - input_b = n x p ZDNN_2D
* - bias = p ZDNN_1D
* - output = s x m x p ZDNN_3DS
*
*/
void zdnn_matmul_bcast23_op_test(uint32_t *input_a_shape,
uint32_t *input_b_shape,
uint32_t *input_bias_shape,
uint32_t *output_shape, float *input_a,
float *input_b, float *bias, bool transpose_a,
bool transpose_b, zdnn_matmul_ops op_type,
zdnn_status expected_status,
float *expected_values) {
/*
* Input A Tensor
*/
zdnn_ztensor *input_a_ztensor = alloc_ztensor_with_values(
input_a_shape, ZDNN_3DS, test_datatype, NO_CONCAT, false, input_a);
/*
* Input B Tensor
*/
zdnn_ztensor *input_b_ztensor = alloc_ztensor_with_values(
input_b_shape, ZDNN_2D, test_datatype, NO_CONCAT, false, input_b);
/*
* Bias Tensor
*/
zdnn_ztensor *input_bias_ztensor = alloc_ztensor_with_values(
input_bias_shape, ZDNN_1D, test_datatype, NO_CONCAT, false, bias);
/*
* Output Tensor
*/
zdnn_ztensor *output_ztensor = alloc_ztensor_with_values(
output_shape, ZDNN_3DS, test_datatype, NO_CONCAT, true, ZERO_ARRAY);
/*
* Get back zDNN test status
*/
zdnn_status test_status = GENERAL_TESTCASE_FAILURE;
test_status = zdnn_matmul_transpose_op(input_a_ztensor, input_b_ztensor,
input_bias_ztensor, transpose_a,
transpose_b, op_type, output_ztensor);
TEST_ASSERT_MESSAGE_FORMATTED(
expected_status == test_status,
"Expected status %08x from zdnn_matmul_transpose_op() with %d Op but "
"%08x was returned.",
expected_status, op_type, test_status);
BEGIN_BLOCK_IF_LOGLEVEL_DEBUG {
int s = input_b_ztensor->transformed_desc->dim4;
int m = input_a_ztensor->transformed_desc->dim2;
int n = input_a_ztensor->transformed_desc->dim1;
int p = input_b_ztensor->transformed_desc->dim1;
print_matmul_array(1, m, n, "input_a", input_a);
print_matmul_array(s, n, p, "input_b", input_b);
print_matmul_array(s, 1, p, "bias", bias);
print_matmul_array(s, m, p, "expected_values", expected_values);
}
fp_tolerance *tol = NULL;
switch (output_ztensor->pre_transformed_desc->type) {
case BFLOAT:
tol = &tol_bfloat;
break;
case FP16:
tol = &tol_fp16;
break;
case FP32:
tol = &tol_fp32;
break;
default:
break;
// should never get here
}
// Only check expected values if we expected the NNPA call to be successful
if (expected_status == ZDNN_OK) {
assert_ztensor_values_adv(output_ztensor, false, expected_values, *tol);
}
// All done--clean up the tensor buffers
free_ztensor_buffers(4, input_a_ztensor, input_b_ztensor, input_bias_ztensor,
output_ztensor);
}
/**
* - MatMul Broadcast 23 BiasAdd
*
* - Matrix input_a = s x m x n --Randomly Generated Positive/Negative Array
* - Matrix input_b = 1 x n x p --Randomly Generated Positive/Negative Array
* - Matrix bias = 1 x p --Randomly Generated Positive Array
* - Matrix output = s x m x p
*/
void zdnn_matmul_bcast_op_smn_by_np(uint64_t s, uint64_t m, uint64_t n,
uint64_t p) {
uint64_t num_values = 0;
// Setup Input A using random values
uint32_t input_a_shape[] = {s, m, n};
num_values = s * m * n;
float input_a_values[num_values];
gen_random_float_array_pos_neg(num_values, input_a_values);
// Setup Input B using random values
uint32_t input_b_shape[] = {n, p};
num_values = n * p;
float input_b_values[s * num_values];
gen_random_float_array_pos_neg(num_values, input_b_values);
// manually "broadcast" those n*p entries s times across input_b_values[]
// because gen_test_expected_fp32_array2() doesn't handle broadcast natively
uint64_t size = n * p * sizeof(float);
uint8_t *tmp_ptr = (uint8_t *)((uintptr_t)input_b_values + size);
for (uint64_t i = 1; i < s; i++) {
memcpy((void *)tmp_ptr, (void *)input_b_values, size);
tmp_ptr += size;
}
// Setup Input bias using random values
uint32_t input_bias_shape[] = {p};
num_values = p;
float input_bias_values[s * num_values];
gen_random_float_array(num_values, input_bias_values);
size = p * sizeof(float);
tmp_ptr = (uint8_t *)((uintptr_t)input_bias_values + size);
for (uint64_t i = 1; i < s; i++) {
memcpy((void *)tmp_ptr, (void *)input_bias_values, size);
tmp_ptr += size;
}
// Setup Output and expected values
uint32_t output_shape[] = {s, m, p};
num_values = s * m * p;
float expected_values[num_values];
gen_test_expected_fp32_array(s, m, n, p, test_datatype, input_a_values,
input_b_values, input_bias_values,
expected_values);
zdnn_matmul_bcast23_op_test(
input_a_shape, input_b_shape, input_bias_shape, output_shape,
input_a_values, input_b_values, input_bias_values, false, false,
MATMUL_BCAST_OP_ADDITION, ZDNN_OK, expected_values);
}
/**
* - MatMul Broadcast 23 BiasAdd (transpose_a)
*
* - Matrix input_a = s x n x m --Randomly Generated Positive/Negative Array
* - Matrix input_b = 1 x n x p --Randomly Generated Positive/Negative Array
* - Matrix bias = 1 x p --Randomly Generated Positive Array
* - Matrix output = s x m x p
*/
void zdnn_matmul_bcast_op_snm_by_np(uint64_t s, uint64_t m, uint64_t n,
uint64_t p) {
uint64_t num_values = 0;
// Setup Input A using random values
uint32_t input_a_shape[] = {s, n, m};
num_values = s * m * n;
float input_a_values[num_values];
gen_random_float_array_pos_neg(num_values, input_a_values);
// transpose input so we know can generate expected values
float input_at_values[num_values];
transpose_array(s, n, m, input_a_values, input_at_values);
// Setup Input B using random values
uint32_t input_b_shape[] = {n, p};
num_values = n * p;
float input_b_values[s * num_values];
gen_random_float_array_pos_neg(num_values, input_b_values);
// manually "broadcast" those n*p entries s times across input_b_values[]
// because gen_test_expected_fp32_array2() doesn't handle broadcast natively
uint64_t size = n * p * sizeof(float);
uint8_t *tmp_ptr = (uint8_t *)((uintptr_t)input_b_values + size);
for (uint64_t i = 1; i < s; i++) {
memcpy((void *)tmp_ptr, (void *)input_b_values, size);
tmp_ptr += size;
}
// Setup Input bias using random values
uint32_t input_bias_shape[] = {p};
num_values = p;
float input_bias_values[s * num_values];
gen_random_float_array(num_values, input_bias_values);
size = p * sizeof(float);
tmp_ptr = (uint8_t *)((uintptr_t)input_bias_values + size);
for (uint64_t i = 1; i < s; i++) {
memcpy((void *)tmp_ptr, (void *)input_bias_values, size);
tmp_ptr += size;
}
// Setup Output and expected values
uint32_t output_shape[] = {s, m, p};
num_values = s * m * p;
float expected_values[num_values];
gen_test_expected_fp32_array(s, m, n, p, test_datatype, input_at_values,
input_b_values, input_bias_values,
expected_values);
zdnn_matmul_bcast23_op_test(
input_a_shape, input_b_shape, input_bias_shape, output_shape,
input_a_values, input_b_values, input_bias_values, true, false,
MATMUL_BCAST_OP_ADDITION, ZDNN_OK, expected_values);
}
/**
* - MatMul Broadcast 23 BiasAdd (transpose_b)
*
* - Matrix input_a = s x m x n --Randomly Generated Positive/Negative Array
* - Matrix input_b = 1 x p x n --Randomly Generated Positive/Negative Array
* - Matrix bias = 1 x p --Randomly Generated Positive Array
* - Matrix output = s x m x p
*/
void zdnn_matmul_bcast_op_smn_by_pn(uint64_t s, uint64_t m, uint64_t n,
uint64_t p) {
uint64_t num_values = 0;
// Setup Input A using random values
uint32_t input_a_shape[] = {s, m, n};
num_values = s * m * n;
float input_a_values[num_values];
gen_random_float_array_pos_neg(num_values, input_a_values);
// Setup Input B using random values
uint32_t input_b_shape[] = {p, n};
num_values = n * p;
float input_b_values[s * num_values];
gen_random_float_array_pos_neg(num_values, input_b_values);
// manually "broadcast" those n*p entries s times across input_b_values[]
// because gen_test_expected_fp32_array2() doesn't handle broadcast natively
uint64_t size = n * p * sizeof(float);
uint8_t *tmp_ptr = (uint8_t *)((uintptr_t)input_b_values + size);
for (uint64_t i = 1; i < s; i++) {
memcpy((void *)tmp_ptr, (void *)input_b_values, size);
tmp_ptr += size;
}
// transpose input so we know can generate expected values
float input_bt_values[s * num_values];
transpose_array(s, p, n, input_b_values, input_bt_values);
// Setup Input bias using random values
uint32_t input_bias_shape[] = {p};
num_values = p;
float input_bias_values[s * num_values];
gen_random_float_array(num_values, input_bias_values);
size = p * sizeof(float);
tmp_ptr = (uint8_t *)((uintptr_t)input_bias_values + size);
for (uint64_t i = 1; i < s; i++) {
memcpy((void *)tmp_ptr, (void *)input_bias_values, size);
tmp_ptr += size;
}
// Setup Output and expected values
uint32_t output_shape[] = {s, m, p};
num_values = s * m * p;
float expected_values[num_values];
gen_test_expected_fp32_array(s, m, n, p, test_datatype, input_a_values,
input_bt_values, input_bias_values,
expected_values);
zdnn_matmul_bcast23_op_test(
input_a_shape, input_b_shape, input_bias_shape, output_shape,
input_a_values, input_b_values, input_bias_values, false, true,
MATMUL_BCAST_OP_ADDITION, ZDNN_OK, expected_values);
}
/**
* - MatMul Broadcast 23 BiasAdd (transpose_a and transpose_b)
*
* - Matrix input_a = s x n x m --Randomly Generated Positive/Negative Array
* - Matrix input_b = 1 x p x n --Randomly Generated Positive/Negative Array
* - Matrix bias = 1 x p --Randomly Generated Positive Array
* - Matrix output = s x m x p
*/
void zdnn_matmul_bcast_op_snm_by_pn(uint64_t s, uint64_t m, uint64_t n,
uint64_t p) {
uint64_t num_values = 0;
// Setup Input A using random values
uint32_t input_a_shape[] = {s, n, m};
num_values = s * m * n;
float input_a_values[num_values];
gen_random_float_array_pos_neg(num_values, input_a_values);
// transpose input so we know can generate expected values
float input_at_values[num_values];
transpose_array(s, n, m, input_a_values, input_at_values);
// Setup Input B using random values
uint32_t input_b_shape[] = {p, n};
num_values = n * p;
float input_b_values[s * num_values];
gen_random_float_array_pos_neg(num_values, input_b_values);
// manually "broadcast" those n*p entries s times across input_b_values[]
// because gen_test_expected_fp32_array2() doesn't handle broadcast natively
uint64_t size = n * p * sizeof(float);
uint8_t *tmp_ptr = (uint8_t *)((uintptr_t)input_b_values + size);
for (uint64_t i = 1; i < s; i++) {
memcpy((void *)tmp_ptr, (void *)input_b_values, size);
tmp_ptr += size;
}
// transpose input so we know can generate expected values
float input_bt_values[s * num_values];
transpose_array(s, p, n, input_b_values, input_bt_values);
// Setup Input bias using random values
uint32_t input_bias_shape[] = {p};
num_values = p;
float input_bias_values[s * num_values];
gen_random_float_array(num_values, input_bias_values);
size = p * sizeof(float);
tmp_ptr = (uint8_t *)((uintptr_t)input_bias_values + size);
for (uint64_t i = 1; i < s; i++) {
memcpy((void *)tmp_ptr, (void *)input_bias_values, size);
tmp_ptr += size;
}
// Setup Output and expected values
uint32_t output_shape[] = {s, m, p};
num_values = s * m * p;
float expected_values[num_values];
gen_test_expected_fp32_array(s, m, n, p, test_datatype, input_at_values,
input_bt_values, input_bias_values,
expected_values);
zdnn_matmul_bcast23_op_test(
input_a_shape, input_b_shape, input_bias_shape, output_shape,
input_a_values, input_b_values, input_bias_values, true, true,
MATMUL_BCAST_OP_ADDITION, ZDNN_OK, expected_values);
}
void zdnn_matmul_bcast_bias_add_3x10x11_by_11x2() {
zdnn_matmul_bcast_op_smn_by_np(3, 10, 11, 2);
zdnn_matmul_bcast_op_snm_by_np(3, 10, 11, 2);
zdnn_matmul_bcast_op_smn_by_pn(3, 10, 11, 2);
zdnn_matmul_bcast_op_snm_by_pn(3, 10, 11, 2);
}
/**
* - MatMul Broadcast 23 Compare
*
* - Matrix input_a = 3x4x3 -- Manually Coded Input
* - Matrix input_b = 3x2 -- Manually Coded Input
* - Matrix bias = 2 -- Manually Coded Input
* - Matrix output = 3x4x2
*/
void test_compare_3x4x3_by_3x2(zdnn_matmul_ops op, float *exp_vals) {
// Setup Input A
uint32_t input_a_shape[] = {3, 4, 3};
float input_a_values[] = {1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12,
1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12,
1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12};
// manually "broadcast" those 3*2 entries 3 times across input_a_values[]
// because gen_test_expected_fp32_array() doesn't handle broadcast natively
uint32_t input_b_shape[] = {3, 2};
float input_b_values[] = {1, 2, 3, 4, 5, 6, 1, 2, 3,
4, 5, 6, 1, 2, 3, 4, 5, 6};
// manually "broadcast" those 2 entries 3 times across input_a_values[]
// because gen_test_expected_fp32_array() doesn't handle broadcast natively
uint32_t input_c_shape[] = {2};
float input_c_values[] = {50, 100, 50, 100, 50, 100};
// Output tensor and expected values
uint32_t output_shape[] = {3, 4, 2};
// test without transpose
zdnn_matmul_bcast23_op_test(
input_a_shape, input_b_shape, input_c_shape, output_shape, input_a_values,
input_b_values, input_c_values, false, false, op, ZDNN_OK, exp_vals);
}
/**
* - MatMul Broadcast 23 Compare (transpose_a)
*
* - Matrix input_a = 3x3x4 -- Manually Coded Input
* - Matrix input_b = 3x2 -- Manually Coded Input
* - Matrix bias = 2 -- Manually Coded Input
* - Matrix output = 3x4x2
*/
void test_compare_3x3x4_by_3x2(zdnn_matmul_ops op, float *exp_vals) {
// Setup Input A
uint32_t input_a_shape[] = {3, 3, 4};
float input_a_values[] = {1, 4, 7, 10, 2, 5, 8, 11, 3, 6, 9, 12,
1, 4, 7, 10, 2, 5, 8, 11, 3, 6, 9, 12,
1, 4, 7, 10, 2, 5, 8, 11, 3, 6, 9, 12};
// manually "broadcast" those 3*2 entries 3 times across input_a_values[]
// because gen_test_expected_fp32_array() doesn't handle broadcast natively
uint32_t input_b_shape[] = {3, 2};
float input_b_values[] = {1, 2, 3, 4, 5, 6, 1, 2, 3,
4, 5, 6, 1, 2, 3, 4, 5, 6};
// manually "broadcast" those 2 entries 3 times across input_a_values[]
// because gen_test_expected_fp32_array() doesn't handle broadcast natively
uint32_t input_c_shape[] = {2};
float input_c_values[] = {50, 100, 50, 100, 50, 100};
// Output tensor and expected values
uint32_t output_shape[] = {3, 4, 2};
// test without transpose
zdnn_matmul_bcast23_op_test(
input_a_shape, input_b_shape, input_c_shape, output_shape, input_a_values,
input_b_values, input_c_values, true, false, op, ZDNN_OK, exp_vals);
}
/**
* - MatMul Broadcast 23 Compare (transpose_b)
*
* - Matrix input_a = 3x4x3 -- Manually Coded Input
* - Matrix input_b = 2x3 -- Manually Coded Input
* - Matrix bias = 2 -- Manually Coded Input
* - Matrix output = 3x4x2
*/
void test_compare_3x4x3_by_2x3(zdnn_matmul_ops op, float *exp_vals) {
// Setup Input A
uint32_t input_a_shape[] = {3, 4, 3};
float input_a_values[] = {1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12,
1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12,
1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12};
// manually "broadcast" those 2*3 entries 3 times across input_a_values[]
// because gen_test_expected_fp32_array() doesn't handle broadcast natively
uint32_t input_b_shape[] = {2, 3};
float input_b_values[] = {1, 3, 5, 2, 4, 6, 1, 3, 5,
2, 4, 6, 1, 3, 5, 2, 4, 6};
// manually "broadcast" those 2 entries 3 times across input_a_values[]
// because gen_test_expected_fp32_array() doesn't handle broadcast natively
uint32_t input_c_shape[] = {2};
float input_c_values[] = {50, 100, 50, 100, 50, 100};
// Output tensor and expected values
uint32_t output_shape[] = {3, 4, 2};
// test without transpose
zdnn_matmul_bcast23_op_test(
input_a_shape, input_b_shape, input_c_shape, output_shape, input_a_values,
input_b_values, input_c_values, false, true, op, ZDNN_OK, exp_vals);
}
/**
* - MatMul Broadcast 23 Compare (transpose_a and transpose_b)
*
* - Matrix input_a = 3x3x4 -- Manually Coded Input
* - Matrix input_b = 2x3 -- Manually Coded Input
* - Matrix bias = 2 -- Manually Coded Input
* - Matrix output = 3x4x2
*/
void test_compare_3x3x4_by_2x3(zdnn_matmul_ops op, float *exp_vals) {
// Setup Input A
uint32_t input_a_shape[] = {3, 3, 4};
float input_a_values[] = {1, 4, 7, 10, 2, 5, 8, 11, 3, 6, 9, 12,
1, 4, 7, 10, 2, 5, 8, 11, 3, 6, 9, 12,
1, 4, 7, 10, 2, 5, 8, 11, 3, 6, 9, 12};
// manually "broadcast" those 2*3 entries 3 times across input_a_values[]
// because gen_test_expected_fp32_array() doesn't handle broadcast natively
uint32_t input_b_shape[] = {2, 3};
float input_b_values[] = {1, 3, 5, 2, 4, 6, 1, 3, 5,
2, 4, 6, 1, 3, 5, 2, 4, 6};
// manually "broadcast" those 2 entries 3 times across input_a_values[]
// because gen_test_expected_fp32_array() doesn't handle broadcast natively
uint32_t input_c_shape[] = {2};
float input_c_values[] = {50, 100, 50, 100, 50, 100};
// Output tensor and expected values
uint32_t output_shape[] = {3, 4, 2};
// test without transpose
zdnn_matmul_bcast23_op_test(
input_a_shape, input_b_shape, input_c_shape, output_shape, input_a_values,
input_b_values, input_c_values, true, true, op, ZDNN_OK, exp_vals);
}
void zdnn_matmul_bcast_compare_3x4x3_by_3x2_greater() {
float is_greater_exp_vals[] = {0, 0, 0, 0, 1, 0, 1, 1, 0, 0, 0, 0,
1, 0, 1, 1, 0, 0, 0, 0, 1, 0, 1, 1};
test_compare_3x4x3_by_3x2(MATMUL_OP_GREATER, is_greater_exp_vals);
test_compare_3x3x4_by_3x2(MATMUL_OP_GREATER, is_greater_exp_vals);
test_compare_3x4x3_by_2x3(MATMUL_OP_GREATER, is_greater_exp_vals);
test_compare_3x3x4_by_2x3(MATMUL_OP_GREATER, is_greater_exp_vals);
}
void zdnn_matmul_bcast_compare_3x4x3_by_3x2_greater_equal() {
float is_greater_equal_exp_vals[] = {0, 0, 0, 0, 1, 1, 1, 1, 0, 0, 0, 0,
1, 1, 1, 1, 0, 0, 0, 0, 1, 1, 1, 1};
test_compare_3x4x3_by_3x2(MATMUL_OP_GREATER_EQUAL, is_greater_equal_exp_vals);
test_compare_3x3x4_by_3x2(MATMUL_OP_GREATER_EQUAL, is_greater_equal_exp_vals);
test_compare_3x4x3_by_2x3(MATMUL_OP_GREATER_EQUAL, is_greater_equal_exp_vals);
test_compare_3x3x4_by_2x3(MATMUL_OP_GREATER_EQUAL, is_greater_equal_exp_vals);
}
void zdnn_matmul_bcast_compare_3x4x3_by_3x2_equal() {
float is_equal_exp_vals[] = {0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0,
0, 1, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0};
test_compare_3x4x3_by_3x2(MATMUL_OP_EQUAL, is_equal_exp_vals);
test_compare_3x3x4_by_3x2(MATMUL_OP_EQUAL, is_equal_exp_vals);
test_compare_3x4x3_by_2x3(MATMUL_OP_EQUAL, is_equal_exp_vals);
test_compare_3x3x4_by_2x3(MATMUL_OP_EQUAL, is_equal_exp_vals);
}
void zdnn_matmul_bcast_compare_3x4x3_by_3x2_not_equal() {
float is_not_equal_exp_vals[] = {1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1,
1, 0, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1};
test_compare_3x4x3_by_3x2(MATMUL_OP_NOT_EQUAL, is_not_equal_exp_vals);
test_compare_3x3x4_by_3x2(MATMUL_OP_NOT_EQUAL, is_not_equal_exp_vals);
test_compare_3x4x3_by_2x3(MATMUL_OP_NOT_EQUAL, is_not_equal_exp_vals);
test_compare_3x3x4_by_2x3(MATMUL_OP_NOT_EQUAL, is_not_equal_exp_vals);
}
void zdnn_matmul_bcast_compare_3x4x3_by_3x2_lesser_equal() {
float is_lesser_equal_exp_vals[] = {1, 1, 1, 1, 0, 1, 0, 0, 1, 1, 1, 1,
0, 1, 0, 0, 1, 1, 1, 1, 0, 1, 0, 0};
test_compare_3x4x3_by_3x2(MATMUL_OP_LESSER_EQUAL, is_lesser_equal_exp_vals);
test_compare_3x3x4_by_3x2(MATMUL_OP_LESSER_EQUAL, is_lesser_equal_exp_vals);
test_compare_3x4x3_by_2x3(MATMUL_OP_LESSER_EQUAL, is_lesser_equal_exp_vals);
test_compare_3x3x4_by_2x3(MATMUL_OP_LESSER_EQUAL, is_lesser_equal_exp_vals);
}
void zdnn_matmul_bcast_compare_3x4x3_by_3x2_lesser() {
float is_lesser_exp_vals[] = {1, 1, 1, 1, 0, 0, 0, 0, 1, 1, 1, 1,
0, 0, 0, 0, 1, 1, 1, 1, 0, 0, 0, 0};
test_compare_3x4x3_by_3x2(MATMUL_OP_LESSER, is_lesser_exp_vals);
test_compare_3x3x4_by_3x2(MATMUL_OP_LESSER, is_lesser_exp_vals);
test_compare_3x4x3_by_2x3(MATMUL_OP_LESSER, is_lesser_exp_vals);
test_compare_3x3x4_by_2x3(MATMUL_OP_LESSER, is_lesser_exp_vals);
}
/**
* zdnn_matmul_bcast1_op_test
*
* Handles all the logic to run custom tests.
*
* shapes are in interpreted as:
* - input_a = m x n ZDNN_2D
* - input_b = s x n x p ZDNN_3DS
* - bias = s x p ZDNN_2DS
* - output = s x m x p ZDNN_3DS
*
*/
void zdnn_matmul_bcast1_op_test(uint32_t *input_a_shape,
uint32_t *input_b_shape,
uint32_t *input_bias_shape,
uint32_t *output_shape, float *input_a,
float *input_b, float *bias, bool transpose_a,
bool transpose_b, zdnn_matmul_ops op_type,
zdnn_status expected_status,
float *expected_values) {
/*
* Input A Tensor
*/
zdnn_ztensor *input_a_ztensor = alloc_ztensor_with_values(
input_a_shape, ZDNN_2D, test_datatype, NO_CONCAT, false, input_a);
/*
* Input B Tensor
*/
zdnn_ztensor *input_b_ztensor = alloc_ztensor_with_values(
input_b_shape, ZDNN_3DS, test_datatype, NO_CONCAT, false, input_b);
/*
* Bias Tensor
*/
zdnn_ztensor *input_bias_ztensor = alloc_ztensor_with_values(
input_bias_shape, ZDNN_2DS, test_datatype, NO_CONCAT, false, bias);
/*
* Output Tensor
*/
zdnn_ztensor *output_ztensor = alloc_ztensor_with_values(
output_shape, ZDNN_3DS, test_datatype, NO_CONCAT, true, ZERO_ARRAY);
/*
* Get back zDNN test status
*/
zdnn_status test_status = GENERAL_TESTCASE_FAILURE;
test_status = zdnn_matmul_transpose_op(input_a_ztensor, input_b_ztensor,
input_bias_ztensor, transpose_a,
transpose_b, op_type, output_ztensor);
TEST_ASSERT_MESSAGE_FORMATTED(
expected_status == test_status,
"Expected status %08x from zdnn_matmul_transpose_op() with %d Op but "
"%08x was returned.",
expected_status, op_type, test_status);
BEGIN_BLOCK_IF_LOGLEVEL_DEBUG {
int s = input_b_ztensor->transformed_desc->dim4;
int m = input_a_ztensor->transformed_desc->dim2;
int n = input_a_ztensor->transformed_desc->dim1;
int p = input_b_ztensor->transformed_desc->dim1;
print_matmul_array(1, m, n, "input_a", input_a);
print_matmul_array(s, n, p, "input_b", input_b);
print_matmul_array(s, 1, p, "bias", bias);
print_matmul_array(s, m, p, "expected_values", expected_values);
}
fp_tolerance *tol = NULL;
switch (output_ztensor->pre_transformed_desc->type) {
case BFLOAT:
tol = &tol_bfloat;
break;
case FP16:
tol = &tol_fp16;
break;
case FP32:
tol = &tol_fp32;
break;
default:
break;
// should never get here
}
// Only check expected values if we expected the NNPA call to be successful
if (expected_status == ZDNN_OK) {
assert_ztensor_values_adv(output_ztensor, false, expected_values, *tol);
}
// All done--clean up the tensor buffers
free_ztensor_buffers(4, input_a_ztensor, input_b_ztensor, input_bias_ztensor,
output_ztensor);
}
/**
* - MatMul Broadcast 1 BiasAdd
*
* - Matrix input_a = 1 x m x n --Randomly Generated Positive/Negative Array
* - Matrix input_b = s x n x p --Randomly Generated Positive/Negative Array
* - Matrix bias = s x p --Randomly Generated Positive Array
* - Matrix output = s x m x p
*/
void zdnn_matmul_bcast_op_mn_by_snp(uint64_t s, uint64_t m, uint64_t n,
uint64_t p) {
uint64_t num_values = 0;
// Setup Input A using random values
uint32_t input_a_shape[] = {m, n};
num_values = m * n;
float input_a_values[s * num_values];
gen_random_float_array_pos_neg(num_values, input_a_values);
// manually "broadcast" those m*n entries s times across input_a_values[]
// because gen_test_expected_fp32_array() doesn't handle broadcast natively
uint64_t size = m * n * sizeof(float);
uint8_t *tmp_ptr = (uint8_t *)((uintptr_t)input_a_values + size);
for (uint64_t i = 1; i < s; i++) {
memcpy((void *)tmp_ptr, (void *)input_a_values, size);
tmp_ptr += size;
}
// Setup Input B using random values
uint32_t input_b_shape[] = {s, n, p};
num_values = s * n * p;
float input_b_values[num_values];
gen_random_float_array_pos_neg(num_values, input_b_values);
// Setup Input bias using random values
uint32_t input_bias_shape[] = {s, p};
num_values = s * p;
float input_bias_values[num_values];
gen_random_float_array(num_values, input_bias_values);
// Setup Output and expected values
uint32_t output_shape[] = {s, m, p};
num_values = s * m * p;
float expected_values[num_values];
gen_test_expected_fp32_array(s, m, n, p, test_datatype, input_a_values,
input_b_values, input_bias_values,
expected_values);
zdnn_matmul_bcast1_op_test(input_a_shape, input_b_shape, input_bias_shape,
output_shape, input_a_values, input_b_values,
input_bias_values, false, false,
MATMUL_OP_ADDITION, ZDNN_OK, expected_values);
}
/**
* - MatMul Broadcast 1 BiasAdd (transpose_a)
*
* - Matrix input_a = 1 x n x m --Randomly Generated Positive/Negative Array
* - Matrix input_b = s x n x p --Randomly Generated Positive/Negative Array
* - Matrix bias = s x p --Randomly Generated Positive Array
* - Matrix output = s x m x p
*/
void zdnn_matmul_bcast_op_nm_by_snp(uint64_t s, uint64_t m, uint64_t n,
uint64_t p) {
uint64_t num_values = 0;
// Setup Input A using random values
uint32_t input_a_shape[] = {n, m};
num_values = m * n;
float input_a_values[s * num_values];
gen_random_float_array_pos_neg(num_values, input_a_values);
// manually "broadcast" those m*n entries s times across input_a_values[]
// because gen_test_expected_fp32_array() doesn't handle broadcast natively
uint64_t size = m * n * sizeof(float);
uint8_t *tmp_ptr = (uint8_t *)((uintptr_t)input_a_values + size);
for (uint64_t i = 1; i < s; i++) {
memcpy((void *)tmp_ptr, (void *)input_a_values, size);
tmp_ptr += size;
}
// transpose input so we know can generate expected values
float input_at_values[s * num_values];
transpose_array(s, n, m, input_a_values, input_at_values);
// Setup Input B using random values
uint32_t input_b_shape[] = {s, n, p};
num_values = s * n * p;
float input_b_values[num_values];
gen_random_float_array_pos_neg(num_values, input_b_values);
// Setup Input bias using random values
uint32_t input_bias_shape[] = {s, p};
num_values = s * p;
float input_bias_values[num_values];
gen_random_float_array(num_values, input_bias_values);
// Setup Output and expected values
uint32_t output_shape[] = {s, m, p};
num_values = s * m * p;
float expected_values[num_values];
gen_test_expected_fp32_array(s, m, n, p, test_datatype, input_at_values,
input_b_values, input_bias_values,
expected_values);
zdnn_matmul_bcast1_op_test(input_a_shape, input_b_shape, input_bias_shape,
output_shape, input_a_values, input_b_values,
input_bias_values, true, false, MATMUL_OP_ADDITION,
ZDNN_OK, expected_values);
}
/**
* - MatMul Broadcast 1 BiasAdd (transpose_b)
*
* - Matrix input_a = 1 x m x n --Randomly Generated Positive/Negative Array
* - Matrix input_b = s x p x n --Randomly Generated Positive/Negative Array
* - Matrix bias = s x p --Randomly Generated Positive Array
* - Matrix output = s x m x p
*/
void zdnn_matmul_bcast_op_mn_by_spn(uint64_t s, uint64_t m, uint64_t n,
uint64_t p) {
uint64_t num_values = 0;
// Setup Input A using random values
uint32_t input_a_shape[] = {m, n};
num_values = m * n;
float input_a_values[s * num_values];
gen_random_float_array_pos_neg(num_values, input_a_values);
// manually "broadcast" those m*n entries s times across input_a_values[]
// because gen_test_expected_fp32_array() doesn't handle broadcast natively
uint64_t size = m * n * sizeof(float);
uint8_t *tmp_ptr = (uint8_t *)((uintptr_t)input_a_values + size);
for (uint64_t i = 1; i < s; i++) {
memcpy((void *)tmp_ptr, (void *)input_a_values, size);
tmp_ptr += size;
}
// Setup Input B using random values
uint32_t input_b_shape[] = {s, p, n};
num_values = s * n * p;
float input_b_values[num_values];
gen_random_float_array_pos_neg(num_values, input_b_values);
// transpose input so we know can generate expected values
float input_bt_values[num_values];
transpose_array(s, p, n, input_b_values, input_bt_values);
// Setup Input bias using random values
uint32_t input_bias_shape[] = {s, p};
num_values = s * p;
float input_bias_values[num_values];
gen_random_float_array(num_values, input_bias_values);
// Setup Output and expected values
uint32_t output_shape[] = {s, m, p};
num_values = s * m * p;
float expected_values[num_values];
gen_test_expected_fp32_array(s, m, n, p, test_datatype, input_a_values,
input_bt_values, input_bias_values,
expected_values);
zdnn_matmul_bcast1_op_test(input_a_shape, input_b_shape, input_bias_shape,
output_shape, input_a_values, input_b_values,
input_bias_values, false, true, MATMUL_OP_ADDITION,
ZDNN_OK, expected_values);
}
/**
* - MatMul Broadcast 1 BiasAdd (transpose_a and transpose_b)
*
* - Matrix input_a = 1 x n x m --Randomly Generated Positive/Negative Array
* - Matrix input_b = s x p x n --Randomly Generated Positive/Negative Array
* - Matrix bias = s x p --Randomly Generated Positive Array
* - Matrix output = s x m x p
*/
void zdnn_matmul_bcast_op_nm_by_spn(uint64_t s, uint64_t m, uint64_t n,
uint64_t p) {
uint64_t num_values = 0;
// Setup Input A using random values
uint32_t input_a_shape[] = {n, m};
num_values = m * n;
float input_a_values[s * num_values];
gen_random_float_array_pos_neg(num_values, input_a_values);
// manually "broadcast" those m*n entries s times across input_a_values[]
// because gen_test_expected_fp32_array() doesn't handle broadcast natively
uint64_t size = m * n * sizeof(float);
uint8_t *tmp_ptr = (uint8_t *)((uintptr_t)input_a_values + size);
for (uint64_t i = 1; i < s; i++) {
memcpy((void *)tmp_ptr, (void *)input_a_values, size);
tmp_ptr += size;
}
// transpose input so we know can generate expected values
float input_at_values[s * num_values];
transpose_array(s, n, m, input_a_values, input_at_values);
// Setup Input B using random values
uint32_t input_b_shape[] = {s, p, n};
num_values = s * n * p;
float input_b_values[num_values];
gen_random_float_array_pos_neg(num_values, input_b_values);
// transpose input so we know can generate expected values
float input_bt_values[num_values];
transpose_array(s, p, n, input_b_values, input_bt_values);
// Setup Input bias using random values
uint32_t input_bias_shape[] = {s, p};
num_values = s * p;
float input_bias_values[num_values];
gen_random_float_array(num_values, input_bias_values);
// Setup Output and expected values
uint32_t output_shape[] = {s, m, p};
num_values = s * m * p;
float expected_values[num_values];
gen_test_expected_fp32_array(s, m, n, p, test_datatype, input_at_values,
input_bt_values, input_bias_values,
expected_values);
zdnn_matmul_bcast1_op_test(input_a_shape, input_b_shape, input_bias_shape,
output_shape, input_a_values, input_b_values,
input_bias_values, true, true, MATMUL_OP_ADDITION,
ZDNN_OK, expected_values);
}
void zdnn_matmul_bcast_bias_add_10x11_by_3x11x2() {
zdnn_matmul_bcast_op_mn_by_snp(3, 10, 11, 2);
zdnn_matmul_bcast_op_nm_by_snp(3, 10, 11, 2);
zdnn_matmul_bcast_op_mn_by_spn(3, 10, 11, 2);
zdnn_matmul_bcast_op_nm_by_spn(3, 10, 11, 2);
}
/**
* - MatMul Broadcast 1 Compare
*
* - Matrix input_a = 4x3 -- Manually Coded Input
* - Matrix input_b = 3x3x2 -- Manually Coded Input
* - Matrix bias = 3x2 -- Manually Coded Input
* - Matrix output = 3x4x2
*/
void test_compare_4x3_by_3x3x2(zdnn_matmul_ops op, float *exp_vals) {
// Setup Input A
uint32_t input_a_shape[] = {4, 3};
// manually "broadcast" those 4*3 entries 3 times across input_a_values[]
// because gen_test_expected_fp32_array() doesn't handle broadcast natively
float input_a_values[] = {1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12,
1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12,
1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12};
// Setup Input B
uint32_t input_b_shape[] = {3, 3, 2};
float input_b_values[] = {1, 2, 3, 4, 5, 6, 1, 2, 3,
4, 5, 6, 1, 2, 3, 4, 5, 6};
// Setup Input bias
uint32_t input_c_shape[] = {3, 2};
float input_c_values[] = {50, 100, 50, 100, 50, 100};
// Output tensor and expected values
uint32_t output_shape[] = {3, 4, 2};
// test without transpose
zdnn_matmul_bcast1_op_test(
input_a_shape, input_b_shape, input_c_shape, output_shape, input_a_values,
input_b_values, input_c_values, false, false, op, ZDNN_OK, exp_vals);
}
/**
* - MatMul Broadcast 1 Compare (transpose_a)
*
* - Matrix input_a = 3x4 -- Manually Coded Input
* - Matrix input_b = 3x3x2 -- Manually Coded Input
* - Matrix bias = 3x2 -- Manually Coded Input
* - Matrix output = 3x4x2
*/
void test_compare_3x4_by_3x3x2(zdnn_matmul_ops op, float *exp_vals) {
// Setup Input A
uint32_t input_a_shape[] = {3, 4};
// manually "broadcast" those 3*4 entries 3 times across input_a_values[]
// because gen_test_expected_fp32_array() doesn't handle broadcast natively
float input_a_values[] = {1, 4, 7, 10, 2, 5, 8, 11, 3, 6, 9, 12,
1, 4, 7, 10, 2, 5, 8, 11, 3, 6, 9, 12,
1, 4, 7, 10, 2, 5, 8, 11, 3, 6, 9, 12};
// Setup Input B
uint32_t input_b_shape[] = {3, 3, 2};
float input_b_values[] = {1, 2, 3, 4, 5, 6, 1, 2, 3,
4, 5, 6, 1, 2, 3, 4, 5, 6};
// Setup Input bias
uint32_t input_c_shape[] = {3, 2};
float input_c_values[] = {50, 100, 50, 100, 50, 100};
// Output tensor and expected values
uint32_t output_shape[] = {3, 4, 2};
// test without transpose
zdnn_matmul_bcast1_op_test(
input_a_shape, input_b_shape, input_c_shape, output_shape, input_a_values,
input_b_values, input_c_values, true, false, op, ZDNN_OK, exp_vals);
}
/**
* - MatMul Broadcast 1 Compare (transpose_b)
*
* - Matrix input_a = 4x3 -- Manually Coded Input
* - Matrix input_b = 3x2x3 -- Manually Coded Input
* - Matrix bias = 3x2 -- Manually Coded Input
* - Matrix output = 3x4x2
*/
void test_compare_4x3_by_3x2x3(zdnn_matmul_ops op, float *exp_vals) {
// Setup Input A
uint32_t input_a_shape[] = {4, 3};
// manually "broadcast" those 4*3 entries 3 times across input_a_values[]
// because gen_test_expected_fp32_array() doesn't handle broadcast natively
float input_a_values[] = {1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12,
1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12,
1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12};
// Setup Input B
uint32_t input_b_shape[] = {3, 2, 3};
float input_b_values[] = {1, 3, 5, 2, 4, 6, 1, 3, 5,
2, 4, 6, 1, 3, 5, 2, 4, 6};
// Setup Input bias
uint32_t input_c_shape[] = {3, 2};
float input_c_values[] = {50, 100, 50, 100, 50, 100};
// Output tensor and expected values
uint32_t output_shape[] = {3, 4, 2};
// test without transpose
zdnn_matmul_bcast1_op_test(
input_a_shape, input_b_shape, input_c_shape, output_shape, input_a_values,
input_b_values, input_c_values, false, true, op, ZDNN_OK, exp_vals);
}
/**
* - MatMul Broadcast 1 Compare (transpose_a and transpose_b)
*
* - Matrix input_a = 3x4 -- Manually Coded Input
* - Matrix input_b = 3x2x3 -- Manually Coded Input
* - Matrix bias = 3x2 -- Manually Coded Input
* - Matrix output = 3x4x2
*/
void test_compare_3x4_by_3x2x3(zdnn_matmul_ops op, float *exp_vals) {
// Setup Input A
uint32_t input_a_shape[] = {3, 4};
// manually "broadcast" those 3*4 entries 3 times across input_a_values[]
// because gen_test_expected_fp32_array() doesn't handle broadcast natively
float input_a_values[] = {1, 4, 7, 10, 2, 5, 8, 11, 3, 6, 9, 12,
1, 4, 7, 10, 2, 5, 8, 11, 3, 6, 9, 12,
1, 4, 7, 10, 2, 5, 8, 11, 3, 6, 9, 12};
// Setup Input B
uint32_t input_b_shape[] = {3, 2, 3};
float input_b_values[] = {1, 3, 5, 2, 4, 6, 1, 3, 5,
2, 4, 6, 1, 3, 5, 2, 4, 6};
// Setup Input bias
uint32_t input_c_shape[] = {3, 2};
float input_c_values[] = {50, 100, 50, 100, 50, 100};
// Output tensor and expected values
uint32_t output_shape[] = {3, 4, 2};
// test without transpose
zdnn_matmul_bcast1_op_test(input_a_shape, input_b_shape, input_c_shape,
output_shape, input_a_values, input_b_values,
input_c_values, true, true, op, ZDNN_OK, exp_vals);
}
void zdnn_matmul_bcast_compare_4x3_by_3x3x2_greater() {
float is_greater_exp_vals[] = {0, 0, 0, 0, 1, 0, 1, 1, 0, 0, 0, 0,
1, 0, 1, 1, 0, 0, 0, 0, 1, 0, 1, 1};
test_compare_4x3_by_3x3x2(MATMUL_OP_GREATER, is_greater_exp_vals);
test_compare_3x4_by_3x3x2(MATMUL_OP_GREATER, is_greater_exp_vals);
test_compare_4x3_by_3x2x3(MATMUL_OP_GREATER, is_greater_exp_vals);
test_compare_3x4_by_3x2x3(MATMUL_OP_GREATER, is_greater_exp_vals);
}
void zdnn_matmul_bcast_compare_4x3_by_3x3x2_greater_equal() {
float is_greater_equal_exp_vals[] = {0, 0, 0, 0, 1, 1, 1, 1, 0, 0, 0, 0,
1, 1, 1, 1, 0, 0, 0, 0, 1, 1, 1, 1};
test_compare_4x3_by_3x3x2(MATMUL_OP_GREATER_EQUAL, is_greater_equal_exp_vals);
test_compare_3x4_by_3x3x2(MATMUL_OP_GREATER_EQUAL, is_greater_equal_exp_vals);
test_compare_4x3_by_3x2x3(MATMUL_OP_GREATER_EQUAL, is_greater_equal_exp_vals);
test_compare_3x4_by_3x2x3(MATMUL_OP_GREATER_EQUAL, is_greater_equal_exp_vals);
}
void zdnn_matmul_bcast_compare_4x3_by_3x3x2_equal() {
float is_equal_exp_vals[] = {0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0,
0, 1, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0};
test_compare_4x3_by_3x3x2(MATMUL_OP_EQUAL, is_equal_exp_vals);
}
void zdnn_matmul_bcast_compare_4x3_by_3x3x2_not_equal() {
float is_not_equal_exp_vals[] = {1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1,
1, 0, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1};
test_compare_4x3_by_3x3x2(MATMUL_OP_NOT_EQUAL, is_not_equal_exp_vals);
}
void zdnn_matmul_bcast_compare_4x3_by_3x3x2_lesser_equal() {
float is_lesser_equal_exp_vals[] = {1, 1, 1, 1, 0, 1, 0, 0, 1, 1, 1, 1,
0, 1, 0, 0, 1, 1, 1, 1, 0, 1, 0, 0};
test_compare_4x3_by_3x3x2(MATMUL_OP_LESSER_EQUAL, is_lesser_equal_exp_vals);
}
void zdnn_matmul_bcast_compare_4x3_by_3x3x2_lesser() {
float is_lesser_exp_vals[] = {1, 1, 1, 1, 0, 0, 0, 0, 1, 1, 1, 1,
0, 0, 0, 0, 1, 1, 1, 1, 0, 0, 0, 0};
test_compare_4x3_by_3x3x2(MATMUL_OP_LESSER, is_lesser_exp_vals);
}
int main() {
UNITY_BEGIN();
RUN_TEST_ALL_DLFLOAT16_PRE_DATATYPES(zdnn_matmul_biasadd_4x3_by_3x2);
RUN_TEST_ALL_DLFLOAT16_PRE_DATATYPES(zdnn_matmul_biasadd_3x4_by_3x2);
RUN_TEST_ALL_DLFLOAT16_PRE_DATATYPES(zdnn_matmul_biasadd_4x3_by_2x3);
RUN_TEST_ALL_DLFLOAT16_PRE_DATATYPES(zdnn_matmul_biasadd_3x4_by_2x3);
RUN_TEST_ALL_DLFLOAT16_PRE_DATATYPES(zdnn_matmul_biasadd_3x10x11_by_3x11x2);
RUN_TEST_ALL_DLFLOAT16_PRE_DATATYPES(zdnn_matmul_compare_4x3_by_3x2_greater);
RUN_TEST_ALL_DLFLOAT16_PRE_DATATYPES(
zdnn_matmul_compare_4x3_by_3x2_greater_equal);
RUN_TEST_ALL_DLFLOAT16_PRE_DATATYPES(zdnn_matmul_compare_4x3_by_3x2_equal);
RUN_TEST_ALL_DLFLOAT16_PRE_DATATYPES(
zdnn_matmul_compare_4x3_by_3x2_not_equal);
RUN_TEST_ALL_DLFLOAT16_PRE_DATATYPES(
zdnn_matmul_compare_4x3_by_3x2_lesser_equal);
RUN_TEST_ALL_DLFLOAT16_PRE_DATATYPES(zdnn_matmul_compare_4x3_by_3x2_lesser);
RUN_TEST_ALL_DLFLOAT16_PRE_DATATYPES(
zdnn_matmul_bcast_bias_add_3x10x11_by_11x2);
RUN_TEST_ALL_DLFLOAT16_PRE_DATATYPES(
zdnn_matmul_bcast_compare_3x4x3_by_3x2_greater);
RUN_TEST_ALL_DLFLOAT16_PRE_DATATYPES(
zdnn_matmul_bcast_compare_3x4x3_by_3x2_greater_equal);
RUN_TEST_ALL_DLFLOAT16_PRE_DATATYPES(
zdnn_matmul_bcast_compare_3x4x3_by_3x2_equal);
RUN_TEST_ALL_DLFLOAT16_PRE_DATATYPES(
zdnn_matmul_bcast_compare_3x4x3_by_3x2_not_equal);
RUN_TEST_ALL_DLFLOAT16_PRE_DATATYPES(
zdnn_matmul_bcast_compare_3x4x3_by_3x2_lesser_equal);
RUN_TEST_ALL_DLFLOAT16_PRE_DATATYPES(
zdnn_matmul_bcast_compare_3x4x3_by_3x2_lesser);
RUN_TEST_ALL_DLFLOAT16_PRE_DATATYPES(
zdnn_matmul_bcast_bias_add_10x11_by_3x11x2);
RUN_TEST_ALL_DLFLOAT16_PRE_DATATYPES(
zdnn_matmul_bcast_compare_4x3_by_3x3x2_greater);
RUN_TEST_ALL_DLFLOAT16_PRE_DATATYPES(
zdnn_matmul_bcast_compare_4x3_by_3x3x2_greater_equal);
RUN_TEST_ALL_DLFLOAT16_PRE_DATATYPES(
zdnn_matmul_bcast_compare_4x3_by_3x3x2_equal);
RUN_TEST_ALL_DLFLOAT16_PRE_DATATYPES(
zdnn_matmul_bcast_compare_4x3_by_3x3x2_not_equal);
RUN_TEST_ALL_DLFLOAT16_PRE_DATATYPES(
zdnn_matmul_bcast_compare_4x3_by_3x3x2_lesser_equal);
RUN_TEST_ALL_DLFLOAT16_PRE_DATATYPES(
zdnn_matmul_bcast_compare_4x3_by_3x3x2_lesser);
return UNITY_END();
}
zDNN-1.1.2/tests/testDriver_zdnn_max_elwise.c 0000664 0000000 0000000 00000012121 15000221702 0021215 0 ustar 00root root 0000000 0000000 // SPDX-License-Identifier: Apache-2.0
/*
* Copyright IBM Corp. 2021, 2024
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#include "common_elwise.h"
void setUp(void) { VERIFY_HW_ENV; }
void tearDown(void) {}
/*
* Simple test to drive a full max api. Input tensor 1 has values greater than
* those in input tensor 2.
*/
void api_max_basic() {
/* Input 1 values as true NHWC
[[
[[3, 30], [6, 60]],
[[8, 80], [3, 10]]
]]
*/
// Values in ZDNN_NHWC order
uint32_t shape[] = {1, 2, 2, 2};
float input1_values[] = {3, 30, 6, 60, 8, 80, 3, 10};
/* Input 2 values as true NHWC
[[
[[1, 15], [3, 12]],
[[4, 40], [4.5, 45]]
]]
*/
// Values in ZDNN_NHWC order
float input2_values[] = {1, 15, 3, 12, 4, 40, 4.5, 15};
/* Expected output values as true NHWC
[[
[[3, 30], [6, 60]],
[[8, 80], [4.5, 45]]
]]
*/
test_elwise_api_2_inputs(shape, ZDNN_NHWC, input1_values, input2_values,
NNPA_MAX, ZDNN_OK);
}
// test to drive input tensors with 280 values in their buffer. All randomly
// generated numbers in first input tensor will be greater than or equal to
// those in the second input tensor to avoid negatives in the output tensor
void api_max_med_dims() {
uint32_t shape[] = {1, 7, 10, 4};
int num_io_buffer_values = shape[0] * shape[1] * shape[2] * shape[3];
// Values in ZDNN_NHWC order
float input1_values[num_io_buffer_values];
gen_random_float_array(num_io_buffer_values, input1_values);
// Values in ZDNN_NHWC order
float input2_values[num_io_buffer_values];
gen_random_float_array(num_io_buffer_values, input2_values);
test_elwise_api_2_inputs(shape, ZDNN_NHWC, input1_values, input2_values,
NNPA_MAX, ZDNN_OK);
}
// test to drive input tensors with 6825 values in their buffer
void api_max_high_dims() {
uint32_t shape[] = {1, 3, 33, 65};
int num_io_buffer_values = shape[0] * shape[1] * shape[2] * shape[3];
// Values in ZDNN_NHWC order
float input1_values[num_io_buffer_values];
gen_random_float_array(num_io_buffer_values, input1_values);
// Values in ZDNN_NHWC order
float input2_values[num_io_buffer_values];
gen_random_float_array(num_io_buffer_values, input2_values);
test_elwise_api_2_inputs(shape, ZDNN_NHWC, input1_values, input2_values,
NNPA_MAX, ZDNN_OK);
}
/*
* Simple test to drive a full max api using the Data type and
* the 3D layout
*/
void api_max_3D() {
/* Input 1 values as true NHWC
[[
[[3, 30], [6, 60]],
[[8, 80], [9, 90]]
]]
*/
// Values in ZDNN_NHWC order
uint32_t shape[] = {2, 2, 2};
float input1_values[] = {3, 30, 6, 60, 8, 80, 9, 90};
/* Input 2 values as true NHWC
[[
[[1, 10], [2, 20]],
[[4, 40], [5, 50]]
]]
*/
// Values in ZDNN_NHWC order
float input2_values[] = {1, 5, 2, 20, 4, 40, 5, 50};
/* Expected values as true NHWC
[[
[[3, 30], [6, 60]],
[[8, 80], [9, 90]]
]]
*/
test_elwise_api_2_inputs(shape, ZDNN_3D, input1_values, input2_values,
NNPA_MAX, ZDNN_OK);
}
/*
* Simple test to drive a full max api using the data type
* and 2 dimensional tensors
*/
void api_max_2D() {
// Values in ZDNN_NHWC order
uint32_t shape[] = {2, 2};
/* Input 1 values as true NHWC
[[
[[1, 10], [2, 20]]
]]
*/
float input1_values[] = {1, 10, 2, 20};
/* Input 2 values as true NHWC
[[
[[3, 20], [2, 5]]
]]
*/
float input2_values[] = {3, 20, 2, 5};
/* Expected values as true NHWC
[[
[[3, 20], [2, 20]]
]]
*/
test_elwise_api_2_inputs(shape, ZDNN_2D, input1_values, input2_values,
NNPA_MAX, ZDNN_OK);
}
/*
* Simple test to drive a full max api using the data type
* and 1 dimensional tensors
*/
void api_max_1D() {
// Values in ZDNN_NHWC order
uint32_t shape[] = {2};
/* Input 1 values as true NHWC
[[
[[10000, 12000]]
]]
*/
float input1_values[] = {10000, 12000};
/* Input 2 values as true NHWC
[[
[[2.5, 4000]]
]]
*/
float input2_values[] = {2.5, 4000};
/* Expected values as true NHWC
[[
[[10000, 12000]]
]]
*/
test_elwise_api_2_inputs(shape, ZDNN_1D, input1_values, input2_values,
NNPA_MAX, ZDNN_OK);
}
int main() {
UNITY_BEGIN();
RUN_TEST_ALL_DLFLOAT16_PRE_DATATYPES(api_max_basic);
RUN_TEST_ALL_DLFLOAT16_PRE_DATATYPES(api_max_med_dims);
RUN_TEST_ALL_DLFLOAT16_PRE_DATATYPES(api_max_high_dims);
RUN_TEST_ALL_DLFLOAT16_PRE_DATATYPES(api_max_3D);
RUN_TEST_ALL_DLFLOAT16_PRE_DATATYPES(api_max_2D);
RUN_TEST_ALL_DLFLOAT16_PRE_DATATYPES(api_max_1D);
return UNITY_END();
}
zDNN-1.1.2/tests/testDriver_zdnn_meanreduce2d_pool.c 0000664 0000000 0000000 00000014517 15000221702 0022462 0 ustar 00root root 0000000 0000000 // SPDX-License-Identifier: Apache-2.0
/*
* Copyright IBM Corp. 2021, 2024
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#include "common_pool.h"
void setUp(void) {
tol_bfloat.ulps = 64;
tol_bfloat.epsilon_mult = (0.1 / EPSILON_BFLOAT) + 1;
tol_fp16.ulps = 64;
tol_fp16.epsilon_mult = (0.1 / EPSILON_FP16) + 1;
tol_fp32.ulps = 64 * 16384;
tol_fp32.epsilon_mult = (0.1 / EPSILON_FLOAT) + 1;
VERIFY_HW_ENV;
}
void tearDown(void) {}
void test_meanreduce2d(uint32_t *input_shape, zdnn_data_layouts input_layout,
bool repeat_first_input_value, float *input_values,
uint32_t *output_shape, zdnn_data_layouts output_layout,
zdnn_status expected_status,
bool repeat_first_expected_value,
float *expected_values) {
// Create input and output ztensors
zdnn_ztensor *input_ztensor = alloc_ztensor_with_values(
input_shape, input_layout, test_datatype, NO_CONCAT,
repeat_first_input_value, input_values);
zdnn_ztensor *output_ztensor = alloc_ztensor_with_values(
output_shape, output_layout, test_datatype, NO_CONCAT, true, ZERO_ARRAY);
// Call public NNPA method
zdnn_status status = zdnn_meanreduce2d(input_ztensor, output_ztensor);
// Assert returned status matches expected
TEST_ASSERT_MESSAGE_FORMATTED(
status == expected_status,
"call to zdnn_meanreduce2d to returned status %08x but expected "
"%08x\n",
status, expected_status);
fp_tolerance *tol = NULL;
switch (output_ztensor->pre_transformed_desc->type) {
case BFLOAT:
tol = &tol_bfloat;
break;
case FP16:
tol = &tol_fp16;
break;
case FP32:
tol = &tol_fp32;
break;
default:
break;
// should never get here
}
// If expected status is ZDNN_OK, assert output values matches expected
if (expected_status == ZDNN_OK) {
assert_ztensor_values_adv(output_ztensor, repeat_first_expected_value,
expected_values, *tol);
}
// Cleanup test ztensors
free_ztensor_buffers(2, input_ztensor, output_ztensor);
}
/*
* Simple test of basic mean reduce
*/
void zdnn_meanreduce2d_basic() {
zdnn_data_layouts layout = ZDNN_NHWC;
/* Visualization of input values
[[
[[1, 10], [2, 20], [3, 30]],
[[4, 40], [5, 50], [6, 60]],
[[7, 70], [8, 80], [9, 90]]
]]
*/
uint32_t input_shape[] = {1, 3, 3, 2};
float input_values[] = {1, 10, 2, 20, 3, 30, 4, 40, 5,
50, 6, 60, 7, 70, 8, 80, 9, 90};
/* Visualization of expected values
[[
[[5, 50]]
]]
*/
uint32_t output_shape[] = {1, 1, 1, 2};
float expected_values[] = {5, 50};
test_meanreduce2d(input_shape, layout, false, input_values, output_shape,
layout, ZDNN_OK, false, expected_values);
}
/*
* Check that we don't hit a condition code when Height and Width dimensions are
* at the largest size allowed.
*/
void zdnn_meanreduce2d_max_height_width_dims_pass() {
zdnn_data_layouts layout = ZDNN_NHWC;
uint32_t input_shape[] = {1, MAXIMUM_POOL_ZERO_STRIDES_KERNEL_SIZE,
MAXIMUM_POOL_ZERO_STRIDES_KERNEL_SIZE, 2};
// Just repeat the same value rather than try and genarate a unique array of
// values for this test.
float input_values[] = {42};
uint32_t output_shape[] = {1, 1, 1, 2};
// Since all input values are the same, they should average to the same.
float *expected_values = input_values;
test_meanreduce2d(input_shape, layout, true, input_values, output_shape,
layout, ZDNN_OK, true, expected_values);
}
/*
* Check that we hit the expected condition code when height is over the
* largest size.
*/
void zdnn_meanreduce2d_over_max_height_fail() {
zdnn_data_layouts layout = ZDNN_NHWC;
// over_max_dim is a valid tensor dimension size but is too large for a
// meanreduce dimension. This should lead to a condition code from the NNPA.
// If not, update the test constant and the API documentation.
uint32_t over_max_dim = MAXIMUM_POOL_ZERO_STRIDES_KERNEL_SIZE + 1;
uint32_t input_shape[] = {1, over_max_dim, 3, 2};
// Just repeat the same value rather than try and genarate a unique array of
// values for this test.
float input_values[] = {42};
uint32_t output_shape[] = {1, 1, 1, 2};
// Output values don't really matter as we expect failure status.
float *expected_values = input_values;
test_meanreduce2d(input_shape, layout, true, input_values, output_shape,
layout, ZDNN_FUNC_RC_F001, true, expected_values);
}
/*
* Check that we hit the expected condition code when width is over the
* largest size.
*/
void zdnn_meanreduce2d_over_max_width_fail() {
zdnn_data_layouts layout = ZDNN_NHWC;
// over_max_dim is a valid tensor dimension size but is too large for a
// meanreduce dimension. This should lead to a condition code from the NNPA.
// If not, update the test constant and the API documentation.
uint32_t over_max_dim = MAXIMUM_POOL_ZERO_STRIDES_KERNEL_SIZE + 1;
uint32_t input_shape[] = {1, 3, over_max_dim, 2};
// Just repeat the same value rather than try and genarate a unique array of
// values for this test.
float input_values[] = {42};
uint32_t output_shape[] = {1, 1, 1, 2};
// Output values don't really matter as we expect failure status.
float *expected_values = input_values;
test_meanreduce2d(input_shape, layout, true, input_values, output_shape,
layout, ZDNN_FUNC_RC_F001, true, expected_values);
}
int main() {
UNITY_BEGIN();
RUN_TEST_ALL_DLFLOAT16_PRE_DATATYPES(zdnn_meanreduce2d_basic);
RUN_TEST_ALL_DLFLOAT16_PRE_DATATYPES(
zdnn_meanreduce2d_max_height_width_dims_pass);
RUN_TEST_ALL_DLFLOAT16_PRE_DATATYPES(zdnn_meanreduce2d_over_max_height_fail);
RUN_TEST_ALL_DLFLOAT16_PRE_DATATYPES(zdnn_meanreduce2d_over_max_width_fail);
return UNITY_END();
}
zDNN-1.1.2/tests/testDriver_zdnn_min_elwise.c 0000664 0000000 0000000 00000012244 15000221702 0021221 0 ustar 00root root 0000000 0000000 // SPDX-License-Identifier: Apache-2.0
/*
* Copyright IBM Corp. 2021, 2024
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#include "common_elwise.h"
void setUp(void) { VERIFY_HW_ENV; }
void tearDown(void) {}
/*
* Simple test to drive a full min api. Input tensor 1 has values greater than
* those in input tensor 2.
*/
void api_min_basic() {
/* Input 1 values as true NHWC sized (1,2,2,2)
[[
[[3, 30], [6, 60]],
[[8, 80], [3, 10]]
]]
*/
// Values in ZDNN_NHWC order
uint32_t shape[] = {1, 2, 2, 2};
float input1_values[] = {3, 30, 6, 60, 8, 80, 3, 10};
/* Input 2 values as true NHWC sized (1,2,2,2)
[[
[[1, 15], [3, 12]],
[[4, 40], [4.5, 45]]
]]
*/
// Values in ZDNN_NHWC order
float input2_values[] = {1, 15, 3, 12, 4, 40, 4.5, 15};
/* Expected values as true NHWC sized (1,2,2,2)
[[
[[1, 15], [3, 12]],
[[4, 40], [3, 10]]
]]
*/
test_elwise_api_2_inputs(shape, ZDNN_NHWC, input1_values, input2_values,
NNPA_MIN, ZDNN_OK);
}
// test to drive input tensors with 280 values in their buffer. All randomly
// generated numbers in first input tensor will be greater than or equal to
// those in the second input tensor to avoid negatives in the output tensor
void api_min_med_dims() {
uint32_t shape[] = {1, 7, 10, 4};
int num_io_buffer_values = shape[0] * shape[1] * shape[2] * shape[3];
// Values in ZDNN_NHWC
float input1_values[num_io_buffer_values];
gen_random_float_array(num_io_buffer_values, input1_values);
// Values in ZDNN_NHWC
float input2_values[num_io_buffer_values];
gen_random_float_array(num_io_buffer_values, input2_values);
test_elwise_api_2_inputs(shape, ZDNN_NHWC, input1_values, input2_values,
NNPA_MIN, ZDNN_OK);
}
// test to drive input tensors with 6825 values in their buffer
void api_min_high_dims() {
uint32_t shape[] = {1, 3, 33, 65};
int num_io_buffer_values = shape[0] * shape[1] * shape[2] * shape[3];
// Values in ZDNN_NHWC
float input1_values[num_io_buffer_values];
gen_random_float_array(num_io_buffer_values, input1_values);
// Values in ZDNN_NHWC
float input2_values[num_io_buffer_values];
gen_random_float_array(num_io_buffer_values, input2_values);
test_elwise_api_2_inputs(shape, ZDNN_NHWC, input1_values, input2_values,
NNPA_MIN, ZDNN_OK);
}
/*
* Simple test to drive a full min api.
*/
void api_min_3D() {
/* Input 1 values as true NHWC sized (1,2,2,2)
[[
[[3, 30], [6, 60]],
[[8, 80], [9, 90]]
]]
*/
// Values in ZDNN_NHWC
uint32_t shape[] = {2, 2, 2};
float input1_values[] = {3, 30, 6, 60, 8, 80, 9, 90};
/* Input 2 values as true NHWC sized (1,2,2,2)
[[
[[1, 10], [2, 20]],
[[4, 40], [5, 50]]
]]
*/
// Values in ZDNN_NHWC
float input2_values[] = {1, 5, 2, 20, 4, 40, 5, 50};
/* Expected values as true NHWC sized (1,2,2,2)
[[
[[1, 10], [2, 20]],
[[4, 40], [5, 50]]
]]
*/
test_elwise_api_2_inputs(shape, ZDNN_3D, input1_values, input2_values,
NNPA_MIN, ZDNN_OK);
}
/*
* Simple test to drive a full min api using the data type
* and 2 dimensional tensors
*/
void api_min_2D() {
// Values in ZDNN_NHWC
uint32_t shape[] = {2, 2};
/* Input 1 values as true NHWC sized (1,1,2,2)
[[
[[1, 10], [2, 20]]
]]
*/
float input1_values[] = {1, 10, 2, 20};
/* Input 2 values as true NHWC sized (1,1,2,2)
[[
[[3, 20], [2, 5]]
]]
*/
float input2_values[] = {3, 20, 2, 5};
/* Expected values as true NHWC sized (1,1,2,2)
[[
[[1, 10], [2, 5]]
]]
*/
test_elwise_api_2_inputs(shape, ZDNN_2D, input1_values, input2_values,
NNPA_MIN, ZDNN_OK);
}
/*
* Simple test to drive a full min api using the data type
* and 1 dimensional tensors
*/
void api_min_1D() {
// Values in ZDNN_NHWC
uint32_t shape[] = {2};
/* Input 1 values as true NHWC sized (1,1,2,2)
[[
[[10000, 12000]]
]]
*/
float input1_values[] = {10000, 12000};
/* Input 2 values as true NHWC sized (1,1,2,2)
[[
[[2.5, 4000]]
]]
*/
float input2_values[] = {2.5, 4000};
/* Expected values as true NHWC sized (1,1,2,2)
[[
[[2.5, 4000]]
]]
*/
test_elwise_api_2_inputs(shape, ZDNN_1D, input1_values, input2_values,
NNPA_MIN, ZDNN_OK);
}
int main() {
UNITY_BEGIN();
RUN_TEST_ALL_DLFLOAT16_PRE_DATATYPES(api_min_basic);
RUN_TEST_ALL_DLFLOAT16_PRE_DATATYPES(api_min_med_dims);
RUN_TEST_ALL_DLFLOAT16_PRE_DATATYPES(api_min_high_dims);
RUN_TEST_ALL_DLFLOAT16_PRE_DATATYPES(api_min_3D);
RUN_TEST_ALL_DLFLOAT16_PRE_DATATYPES(api_min_2D);
RUN_TEST_ALL_DLFLOAT16_PRE_DATATYPES(api_min_1D);
return UNITY_END();
}
zDNN-1.1.2/tests/testDriver_zdnn_moments.c 0000664 0000000 0000000 00000013325 15000221702 0020551 0 ustar 00root root 0000000 0000000 // SPDX-License-Identifier: Apache-2.0
/*
* Copyright IBM Corp. 2023, 2024
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#include "common_act.h"
void setUp(void) {
VERIFY_HW_ENV;
VERIFY_PARMBLKFORMAT_1;
}
void tearDown(void) {}
/**
* zdnn_norm_test
*
* Handles all the logic to run custom tests.
*/
void zdnn_moments_test(uint32_t *i_dims, uint32_t *o_a_dims, uint32_t *o_b_dims,
zdnn_data_layouts layout, float *input_a,
uint32_t bessel_correction, zdnn_status expected_status,
float *expected_values_a, float *expected_values_b) {
/*
* Input Tensor a
*/
zdnn_ztensor *input_ztensor_a = alloc_ztensor_with_values(
i_dims, layout, test_datatype, NO_CONCAT, false, input_a);
/*
* Output Tensor a
*/
zdnn_ztensor *output_ztensor_a = alloc_ztensor_with_values(
o_a_dims, layout, test_datatype, NO_CONCAT, true, ZERO_ARRAY);
/*
* Output Tensor b
*/
zdnn_ztensor *output_ztensor_b = alloc_ztensor_with_values(
o_b_dims, layout, test_datatype, NO_CONCAT, true, ZERO_ARRAY);
/*
* Begin Testing!
*/
zdnn_status status = zdnn_moments(input_ztensor_a, bessel_correction,
output_ztensor_a, output_ztensor_b);
TEST_ASSERT_MESSAGE_FORMATTED(
status == expected_status,
"call to zdnn_moments() to returned status %08x but expected %08x\n",
status, expected_status);
if (expected_status == ZDNN_OK) {
assert_ztensor_values(output_ztensor_a, false, expected_values_a);
assert_ztensor_values(output_ztensor_b, false, expected_values_b);
}
// All done--clean up the tensor buffers
free_ztensor_buffers(3, input_ztensor_a, output_ztensor_a, output_ztensor_b);
}
// Calculate values to approximate zDNN LayerNorm
void generate_moments_output(const float input_values[],
const uint32_t input_shape[],
uint32_t bessel_correction, int num_values,
float expected_values_a[],
float expected_values_b[]) {
uint64_t l =
input_shape[0] * input_shape[1] * input_shape[2] * input_shape[3];
float summation = 0.0;
float summation_sq = 0.0;
for (int i = 0; i < num_values; i++) {
summation += input_values[i];
summation_sq += powf(input_values[i], 2);
}
expected_values_a[0] = summation / l;
expected_values_b[0] =
(summation_sq - (powf(summation, 2) / l)) / (l - bessel_correction);
}
void zdnn_moments_basic_small_nhwc_pos() {
uint32_t shape_i[] = {1, 5, 12, 1};
uint32_t shape_o_a[] = {1, 1, 1, 1};
uint32_t shape_o_b[] = {1, 1, 1, 1};
int num_io_buffer_values = shape_i[0] * shape_i[1] * shape_i[2] * shape_i[3];
float input_values[num_io_buffer_values];
gen_random_float_array(num_io_buffer_values, input_values);
uint32_t bessel_correction = 0;
float expected_values_a[shape_o_a[3]];
float expected_values_b[shape_o_b[3]];
generate_moments_output(input_values, shape_i, bessel_correction,
num_io_buffer_values, expected_values_a,
expected_values_b);
zdnn_moments_test(shape_i, shape_o_a, shape_o_b, ZDNN_NHWC, input_values,
bessel_correction, ZDNN_OK, expected_values_a,
expected_values_b);
}
void zdnn_moments_basic_large_nhwc_pos() {
uint32_t shape_i[] = {1, 56, 70, 3};
uint32_t shape_o_a[] = {1, 1, 1, 1};
uint32_t shape_o_b[] = {1, 1, 1, 1};
int num_io_buffer_values = shape_i[0] * shape_i[1] * shape_i[2] * shape_i[3];
float input_values[num_io_buffer_values];
gen_random_float_array(num_io_buffer_values, input_values);
uint32_t bessel_correction = 0;
float expected_values_a[shape_o_a[3]];
float expected_values_b[shape_o_b[3]];
generate_moments_output(input_values, shape_i, bessel_correction,
num_io_buffer_values, expected_values_a,
expected_values_b);
zdnn_moments_test(shape_i, shape_o_a, shape_o_b, ZDNN_NHWC, input_values,
bessel_correction, ZDNN_OK, expected_values_a,
expected_values_b);
}
void zdnn_moments_basic_large_nhwc_pos_neg() {
uint32_t shape_i[] = {1, 40, 30, 20};
uint32_t shape_o_a[] = {1, 1, 1, 1};
uint32_t shape_o_b[] = {1, 1, 1, 1};
int num_io_buffer_values = shape_i[0] * shape_i[1] * shape_i[2] * shape_i[3];
float input_values[num_io_buffer_values];
gen_random_float_array_pos_neg(num_io_buffer_values, input_values);
uint32_t bessel_correction = 1;
float expected_values_a[shape_o_a[3]];
float expected_values_b[shape_o_b[3]];
generate_moments_output(input_values, shape_i, bessel_correction,
num_io_buffer_values, expected_values_a,
expected_values_b);
zdnn_moments_test(shape_i, shape_o_a, shape_o_b, ZDNN_NHWC, input_values,
bessel_correction, ZDNN_OK, expected_values_a,
expected_values_b);
}
int main() {
UNITY_BEGIN();
RUN_TEST_ALL_DLFLOAT16_PRE_DATATYPES(zdnn_moments_basic_small_nhwc_pos);
RUN_TEST_ALL_DLFLOAT16_PRE_DATATYPES(zdnn_moments_basic_large_nhwc_pos);
RUN_TEST_ALL_DLFLOAT16_PRE_DATATYPES(zdnn_moments_basic_large_nhwc_pos_neg);
UNITY_END();
}
zDNN-1.1.2/tests/testDriver_zdnn_mul_elwise.c 0000664 0000000 0000000 00000011705 15000221702 0021234 0 ustar 00root root 0000000 0000000 // SPDX-License-Identifier: Apache-2.0
/*
* Copyright IBM Corp. 2021, 2024
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#include "common_elwise.h"
void setUp(void) { VERIFY_HW_ENV; }
void tearDown(void) {}
/*
* Simple test to drive a full mul api.
*/
void api_mul_basic() {
/* Input 1 values as true NHWC
[[
[[3, 30], [6, 60]],
[[8, 80], [9, 90]]
]]
*/
// Values in ZDNN_NHWC order
uint32_t shape[] = {1, 2, 2, 2};
float input1_values[] = {3, 30, 6, 60, 8, 80, 9, 90};
/* Input 2 values as true NHWC
[[
[[1, 10], [2, 20]],
[[4, 40], [5, 50]]
]]
*/
// Values in ZDNN_NHWC order
float input2_values[] = {1, 10, 2, 20, 4, 40, 5, 50};
/* Expected values as true NHWC
[[
[[3, 300], [12, 1200]],
[[32, 3200], [45, 4500]]
]]
*/
test_elwise_api_2_inputs(shape, ZDNN_NHWC, input1_values, input2_values,
NNPA_MUL, ZDNN_OK);
}
// test to drive input tensors with 280 values in their buffer.
void api_mul_med_dims() {
uint32_t shape[] = {1, 7, 10, 4};
int num_io_buffer_values = shape[0] * shape[1] * shape[2] * shape[3];
// Values in ZDNN_NHWC order
float input1_values[num_io_buffer_values];
gen_random_float_array(num_io_buffer_values, input1_values);
// Values in ZDNN_NHWC order
float input2_values[num_io_buffer_values];
gen_random_float_array(num_io_buffer_values, input2_values);
test_elwise_api_2_inputs(shape, ZDNN_NHWC, input1_values, input2_values,
NNPA_MUL, ZDNN_OK);
}
// test to drive input tensors with 6825 values in their buffer
void api_mul_high_dims() {
uint32_t shape[] = {1, 3, 33, 65};
int num_io_buffer_values = shape[0] * shape[1] * shape[2] * shape[3];
// Values in ZDNN_NHWC order
float input1_values[num_io_buffer_values];
gen_random_float_array(num_io_buffer_values, input1_values);
// Values in ZDNN_NHWC order
float input2_values[num_io_buffer_values];
gen_random_float_array(num_io_buffer_values, input2_values);
test_elwise_api_2_inputs(shape, ZDNN_NHWC, input1_values, input2_values,
NNPA_MUL, ZDNN_OK);
}
/*
* Simple test to drive a full mul api.
*/
void api_mul_3D() {
/* Input 1 values as true NHWC
[[
[[3, 30], [6, 60]],
[[8, 80], [9, 90]]
]]
*/
// Values in ZDNN_NHWC order
uint32_t shape[] = {2, 2, 2};
float input1_values[] = {3, 30, 6, 60, 8, 80, 9, 90};
/* Input 2 values as true NHWC
[[
[[1, 10], [2, 20]],
[[4, 40], [5, 50]]
]]
*/
// Values in ZDNN_NHWC order
float input2_values[] = {1, 5, 2, 20, 4, 40, 5, 50};
// Create ztensor with input1_values
/* Expected values as true NHWC
[[
[[3, 300], [12, 1200]],
[[32, 3200], [45, 1400]]
]]
*/
test_elwise_api_2_inputs(shape, ZDNN_3D, input1_values, input2_values,
NNPA_MUL, ZDNN_OK);
}
/*
* Simple test to drive a full mul api using the data type
* and 2 dimensional tensors
*/
void api_mul_2D() {
// Values in ZDNN_NHWC order
uint32_t shape[] = {2, 2};
/* Input 1 values as true NHWC sized (1,1,2,2)
[[
[[1, 10], [2, 20]]
]]
*/
float input1_values[] = {1, 10, 2, 20};
/* Input 2 values as true NHWC sized (1,1,2,2)
[[
[[3, 20], [2, 5]]
]]
*/
float input2_values[] = {3, 20, 2, 5};
/* Expected values as true NHWC sized (1,1,2,2)
[[
[[3, 200], [4, 100]]
]]
*/
test_elwise_api_2_inputs(shape, ZDNN_2D, input1_values, input2_values,
NNPA_MUL, ZDNN_OK);
}
/*
* Simple test to drive a full mul api using the data type
* and 1 dimensional tensors
*/
void api_mul_1D() {
// Values in ZDNN_NHWC order
uint32_t shape[] = {2};
/* Input 1 values as true NHWC sized (1,1,2,2)
[[
[[8, 12]]
]]
*/
float input1_values[] = {8, 12};
/* Input 2 values as true NHWC sized (1,1,2,2)
[[
[[2.5, 4000]]
]]
*/
float input2_values[] = {2.5, 4000};
/* Expected values as true NHWC sized (1,1,2,2)
[[
[[20, 48000]]
]]
*/
test_elwise_api_2_inputs(shape, ZDNN_1D, input1_values, input2_values,
NNPA_MUL, ZDNN_OK);
}
int main() {
UNITY_BEGIN();
RUN_TEST_ALL_DLFLOAT16_PRE_DATATYPES(api_mul_basic);
RUN_TEST_ALL_DLFLOAT16_PRE_DATATYPES(api_mul_med_dims);
RUN_TEST_ALL_DLFLOAT16_PRE_DATATYPES(api_mul_high_dims);
RUN_TEST_ALL_DLFLOAT16_PRE_DATATYPES(api_mul_3D);
RUN_TEST_ALL_DLFLOAT16_PRE_DATATYPES(api_mul_2D);
RUN_TEST_ALL_DLFLOAT16_PRE_DATATYPES(api_mul_1D);
return UNITY_END();
}
zDNN-1.1.2/tests/testDriver_zdnn_norm.c 0000664 0000000 0000000 00000020736 15000221702 0020046 0 ustar 00root root 0000000 0000000 // SPDX-License-Identifier: Apache-2.0
/*
* Copyright IBM Corp. 2023, 2024
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#include "common_act.h"
void setUp(void) {
VERIFY_HW_ENV;
VERIFY_PARMBLKFORMAT_1;
tol_bfloat.ulps = MAX_ULPS_BFLOAT;
tol_bfloat.epsilon_mult = MAX_EPSILON_MULT_BFLOAT;
// note: zdnn_norm_basic_large_nhwc (FP16) needs custom tolerance
tol_fp16.ulps = MAX_ULPS_FP16;
tol_fp16.epsilon_mult = (0.25 / EPSILON_FP16) + 1;
tol_fp32.ulps = MAX_ULPS_FLOAT;
tol_fp32.epsilon_mult = MAX_EPSILON_MULT_FLOAT;
}
void tearDown(void) {}
/**
* zdnn_norm_test
*
* Handles all the logic to run custom tests.
*/
void zdnn_norm_test(uint32_t *i_dims, uint32_t *o_dims,
zdnn_data_layouts layout, float *input_a, float *input_b,
zdnn_status expected_status, float *expected_values) {
/*
* Input Tensor a
*/
zdnn_ztensor *input_ztensor_a = alloc_ztensor_with_values(
i_dims, layout, test_datatype, NO_CONCAT, false, input_a);
/*
* Input Tensor b
*/
zdnn_ztensor *input_ztensor_b = alloc_ztensor_with_values(
i_dims, layout, test_datatype, NO_CONCAT, false, input_b);
/*
* Output Tensor
*/
zdnn_ztensor *output_ztensor = alloc_ztensor_with_values(
o_dims, layout, test_datatype, NO_CONCAT, true, ZERO_ARRAY);
/*
* Begin Testing!
*/
zdnn_status status =
zdnn_norm(input_ztensor_a, input_ztensor_b, output_ztensor);
TEST_ASSERT_MESSAGE_FORMATTED(
status == expected_status,
"call to zdnn_norm() to returned status %08x but expected %08x\n",
status, expected_status);
// To allow for unique tolerance
fp_tolerance *tol = NULL;
switch (output_ztensor->pre_transformed_desc->type) {
case BFLOAT:
tol = &tol_bfloat;
break;
case FP16:
tol = &tol_fp16;
break;
case FP32:
tol = &tol_fp32;
break;
default:
break;
// should never get here
}
if (expected_status == ZDNN_OK) {
assert_ztensor_values_adv(output_ztensor, false, expected_values, *tol);
}
// All done--clean up the tensor buffers
free_ztensor_buffers(3, input_ztensor_a, input_ztensor_b, output_ztensor);
}
// Calculate values to approximate zDNN Norm
void approximate_norm(const float input_a_values[],
const float input_b_values[], float expected_values[],
const uint32_t shape_i[], uint32_t input_shape_size) {
// Check if we were passed in all dims we need...for example
// 3D we can just assume N = 1, but for 4D we will need that...
uint32_t N, H, W, C;
if (input_shape_size > 3) {
N = shape_i[0];
H = shape_i[1];
W = shape_i[2];
C = shape_i[3];
} else {
N = 1;
H = shape_i[0];
W = shape_i[1];
C = shape_i[2];
}
for (uint32_t n = 0; n < N; n++) {
for (uint32_t h = 0; h < H; h++) {
for (uint32_t w = 0; w < W; w++) {
float sum = 0.0;
for (uint32_t c = 0; c < C; c++) {
// The expression n * H * W * C + h * W * C + w * C + c is calculating
// an index for a 1D array that represents a 4D tensor. This is
// "flattening" a multi-dimensional tensor into a 1D array.
//
// Then n * H * W * C term:
// For each batch n, there are C * H * W elements.
//
// The h * W * C term:
// For each channel c, there are H * W elements.
//
// The w * C term:
// For each height h, there are W elements (width).
//
// The c term:
// Represents the width position inside the h-th row.
//
// After summing up the terms, you get the 4D position of the
// flattened tensor in (n, c, h, w) as a 1D tensor.
uint32_t index = n * H * W * C + h * W * C + w * C + c;
sum += powf(input_a_values[index] - input_b_values[index], 2);
}
expected_values[w] = sqrtf(sum);
}
}
}
}
/*
-------------------------------------------------------------------------------
Norm Basic
Layout: 3D
-------------------------------------------------------------------------------
*/
void zdnn_norm_basic_small_3d() {
uint32_t shape_i[] = {1, 1, 6};
uint32_t shape_o[] = {1, 1, 1};
float input_a_values[] = {1.0, 2.0, 3.0, 4.0, 5.0, 6.0};
float input_b_values[] = {0.0, 1.0, 2.0, 3.0, 4.0, 5.0};
float expected_values[shape_o[1]];
approximate_norm(input_a_values, input_b_values, expected_values, shape_i,
sizeof(shape_i) / sizeof(shape_i[0]));
zdnn_norm_test(shape_i, shape_o, ZDNN_3D, input_a_values, input_b_values,
ZDNN_OK, expected_values);
}
void zdnn_norm_basic_large_3d__pos_neg() {
uint32_t shape_i[] = {1, 10, 70};
uint32_t shape_o[] = {1, 10, 1};
int num_io_buffer_values = shape_i[0] * shape_i[1] * shape_i[2];
float input_a_values[num_io_buffer_values];
float input_b_values[num_io_buffer_values];
gen_random_float_array_pos_neg(num_io_buffer_values, input_a_values);
gen_random_float_array_pos_neg(num_io_buffer_values, input_b_values);
float expected_values[shape_o[1]];
approximate_norm(input_a_values, input_b_values, expected_values, shape_i,
sizeof(shape_i) / sizeof(shape_i[0]));
zdnn_norm_test(shape_i, shape_o, ZDNN_3D, input_a_values, input_b_values,
ZDNN_OK, expected_values);
}
void zdnn_norm_basic_large_3d_neg() {
uint32_t shape_i[] = {1, 10, 70};
uint32_t shape_o[] = {1, 10, 1};
int num_io_buffer_values = shape_i[0] * shape_i[1] * shape_i[2];
float input_a_values[num_io_buffer_values];
float input_b_values[num_io_buffer_values];
gen_random_float_array_neg(num_io_buffer_values, input_a_values);
gen_random_float_array_neg(num_io_buffer_values, input_b_values);
float expected_values[shape_o[1]];
approximate_norm(input_a_values, input_b_values, expected_values, shape_i,
sizeof(shape_i) / sizeof(shape_i[0]));
zdnn_norm_test(shape_i, shape_o, ZDNN_3D, input_a_values, input_b_values,
ZDNN_OK, expected_values);
}
/*
-------------------------------------------------------------------------------
Norm Basic
Layout: NHWC
-------------------------------------------------------------------------------
*/
void zdnn_norm_basic_small_nhwc() {
uint32_t shape_i[] = {1, 1, 2, 6};
uint32_t shape_o[] = {1, 1, 2, 1};
float input_a_values[] = {1, 2, 3, 4, 5, 6, 5, 10, 15, 20, 25, 30};
float input_b_values[] = {0, 1, 2, 3, 4, 5, 35, 40, 45, 50, 55, 60};
float expected_values[shape_o[2]];
approximate_norm(input_a_values, input_b_values, expected_values, shape_i,
sizeof(shape_i) / sizeof(shape_i[0]));
zdnn_norm_test(shape_i, shape_o, ZDNN_NHWC, input_a_values, input_b_values,
ZDNN_OK, expected_values);
}
void zdnn_norm_basic_large_nhwc() {
// Initialize the dimensions for our input tensor ZDNN_3DS
uint32_t shape_i[] = {1, 1, 70, 180};
uint32_t shape_o[] = {1, 1, 70, 1};
int num_io_buffer_values = shape_i[0] * shape_i[1] * shape_i[2] * shape_i[3];
float input_a_values[num_io_buffer_values];
float input_b_values[num_io_buffer_values];
gen_random_float_array_neg(num_io_buffer_values, input_a_values);
gen_random_float_array_neg(num_io_buffer_values, input_b_values);
float expected_values[shape_o[2]];
approximate_norm(input_a_values, input_b_values, expected_values, shape_i,
sizeof(shape_i) / sizeof(shape_i[0]));
zdnn_norm_test(shape_i, shape_o, ZDNN_NHWC, input_a_values, input_b_values,
ZDNN_OK, expected_values);
}
int main() {
UNITY_BEGIN();
RUN_TEST_ALL_DLFLOAT16_PRE_DATATYPES(zdnn_norm_basic_small_3d);
RUN_TEST_ALL_DLFLOAT16_PRE_DATATYPES(zdnn_norm_basic_large_3d__pos_neg);
RUN_TEST_ALL_DLFLOAT16_PRE_DATATYPES(zdnn_norm_basic_large_3d_neg);
RUN_TEST_ALL_DLFLOAT16_PRE_DATATYPES(zdnn_norm_basic_small_nhwc);
RUN_TEST_ALL_DLFLOAT16_PRE_DATATYPES(zdnn_norm_basic_large_nhwc);
UNITY_END();
}
zDNN-1.1.2/tests/testDriver_zdnn_quantized_matmul_op.c 0000664 0000000 0000000 00000424234 15000221702 0023155 0 ustar 00root root 0000000 0000000 // SPDX-License-Identifier: Apache-2.0
/*
* Copyright IBM Corp. 2023, 2024
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#include "common_quantization.h"
#include "testsupport.h"
#include
#include
#include
#include
#include
/******************************************************************************
default_input
******************************************************************************/
uint32_t default_input_shape[] = {2, 2, 4};
uint32_t bcast_input_shape[] = {2, 4};
/* Visualization of values in shape (s, m, n) order
[[[-1.2135693 28.734085 8.497408 -1.9210271]
[-23.742136 16.26094 -21.234303 60.51914]],
[[-1.2135693 28.734085 8.497408 -1.9210271]
[-23.742136 16.26094 -21.234303 60.51914]]]
*/
float default_input_values[] = {-1.2135693, 28.734085, 8.497408, -1.9210271,
-23.742136, 16.26094, -21.234303, 60.51914,
-1.2135693, 28.734085, 8.497408, -1.9210271,
-23.742136, 16.26094, -21.234303, 60.51914};
float default_input_min = -100.f;
float default_input_max = 80.f;
float default_input_scale = 0.70588235294f; // (80.0 - -100.0) / 255.0
float default_input_offset = 14.f;
bool default_disable_clipping = false;
/*
a Quantized:
[[[ 12 55 26 11]
[-20 37 -16 100]],
[[ 12 55 26 11]
[-20 37 -16 100]]]
a Dequantized:
[[[ -1.4117647 28.941177 8.470589 -2.1176472]
[-24. 16.235294 -21.17647 60.705883 ]],
[[ -1.4117647 28.941177 8.470589 -2.1176472]
[-24. 16.235294 -21.17647 60.705883 ]]]
*/
/******************************************************************************
default_weights
******************************************************************************/
uint32_t default_weights_shape[] = {2, 4, 3};
uint32_t bcast_weights_shape[] = {4, 3};
/* Visualization of weights values in shape (s, n, p) order
[[[ 8.909883 -8.496755 3.7517512]
[-4.1331525 -2.9586632 7.767899]
[-17.868917 -17.386122 -19.393448]
[ 4.9785953 3.3447025 6.1003647]],
[[ 8.909883 -8.496755 3.7517512]
[-4.1331525 -2.9586632 7.767899]
[-17.868917 -17.386122 -19.393448]
[ 4.9785953 3.3447025 6.1003647]]]
*/
float default_weights_values[] = {
8.909883, -8.496755, 3.7517512, -4.1331525, -2.9586632, 7.767899,
-17.868917, -17.386122, -19.393448, 4.9785953, 3.3447025, 6.1003647,
8.909883, -8.496755, 3.7517512, -4.1331525, -2.9586632, 7.767899,
-17.868917, -17.386122, -19.393448, 4.9785953, 3.3447025, 6.1003647};
float default_weights_min = -20.f;
float default_weights_max = 10.f;
float default_weights_scale = 0.11764705882f; // (10.0 - -20.0) / 255.0
float default_weights_offset = 42.f;
float symmetric_weights_min = -20.f;
float symmetric_weights_max = 20.f;
float symmetric_weights_scale = 0.15686274509f; // (20.0 - -20.0) / 255.0
float symmetric_weights_offset = 0.f;
/*
b Quantized:
[[[ 118 -30 74]
[ 7 17 108]
[-110 -106 -123]
[ 84 70 94]],
[[ 118 -30 74]
[ 7 17 108]
[-110 -106 -123]
[ 84 70 94]]]
b Dequantized:
[[[ 8.941176 -8.470589 3.764706 ]
[ -4.117647 -2.9411764 7.7647057]
[-17.882353 -17.411764 -19.411764 ]
[ 4.9411764 3.2941177 6.117647 ]],
[[ 8.941176 -8.470589 3.764706 ]
[ -4.117647 -2.9411764 7.7647057]
[-17.882353 -17.411764 -19.411764 ]
[ 4.9411764 3.2941177 6.117647 ]]]
*/
/******************************************************************************
default_biases
******************************************************************************/
uint32_t default_biases_shape[] = {2, 3};
uint32_t bcast_biases_shape[] = {3};
/* Visualization of bias values in shape (s, p) order
[[478.61835 299.15857 -38.520638],
[478.61835 299.15857 -38.520638]]
*/
float default_biases_values[] = {478.61835, 299.15857, -38.520638,
478.61835, 299.15857, -38.520638};
float default_biases_min = -500.f;
float default_biases_max = 500.f;
float default_biases_scale = 3.92156862745f; // (500.0 - -500.0) / 255.0
float default_biases_offset = 0.f;
/*
c Quantized:
[[122 76 -10],
[122 76 -10]]
c Dequantized:
[[478.43137 298.0392 -39.215687],
[478.43137 298.0392 -39.215687]]
*/
/******************************************************************************
default_output
******************************************************************************/
uint32_t default_output_shape[] = {2, 2, 3};
/*
Expected qc_tilde:
[28.6345098 20.96784314 6.6345098]
Expected qy_hw:
[[28.15803922 15.98784314 23.09568627]
[57.07803922 55.9972549 55.63686275]]
Expected qy_sw:
[[20.30823529 12.99529412 22.97647059]
[19.86352941 12.55058824 22.53176471]]
Expected qy:
[[ 7.84980392 2.99254902 0.11921569]
[37.2145098 43.44666667 33.10509804]]
Expected y Quantized:
[[ 8 3 0]
[37 44 33]]
Expected y Dequantized:
[[ 188.23529 70.588234 0. ]
[ 870.58826 1011.7647 776.4706 ]]
*/
/*
Expected Symmetric qc_tilde:
[20.33333333 12.66666667 -1.66666667]
Expected Symmetric qy_hw:
[[ 7.81568627 2.95163399 0.21568627]
[37.51503268 43.17647059 33.26666667]]
Expected Symmetric qy_sw:
[[0. 0. 0.]
[0. 0. 0.]]
Expected Symmetric qy:
[[ 7.81568627 2.95163399 0.21568627]
[37.51503268 43.17647059 33.26666667]]
Expected Symmetric y Quantized:
[[ 8 3 0]
[37 44 33]]
Expected Symmetric y Dequantized:
[[ 188.23529 70.588234 0. ]
[ 870.58826 1011.7647 776.4706 ]]
*/
/******************************************************************************
Unity Methods
******************************************************************************/
void setUp(void) {
VERIFY_HW_ENV;
VERIFY_PARMBLKFORMAT_1;
}
void tearDown(void) {}
/******************************************************************************
Helper Methods
******************************************************************************/
/// Allocates a 4k aligned work area buffer based on the given size and returns
/// a pointer to the memory.
///
/// \param[in] work_area_size size in bytes required for the work_] area
///
/// \return pointer to the work area buffer or throws test failure
///
void *alloc_quantized_matmul_work_area(size_t work_area_size) {
void *work_area = NULL;
if (!(work_area = malloc_aligned_4k(work_area_size))) {
TEST_FAIL_MESSAGE_FORMATTED("malloc_aligned_4k (%zu) failed",
work_area_size);
}
memset(work_area, 0, work_area_size);
return work_area;
}
/// Generates and fills the passed scale and offset for the passed min and max.
///
/// \param[in] min the min float value of the range
/// \param[in] max the max float value of the range
/// \param[in] scale pointer to a float that will store the computed scale
/// \param[in] offset pointer to a float that will store the computed offset
///
void gen_scale_and_offset(float min, float max, float *scale, float *offset) {
*scale = (max - min) / 255.f;
int zero_point = (int)((max * -128.f - min * 127.f) / (max - min));
*offset = (float)(zero_point);
}
/**
* Helper function to compute expected output tensor from randomly generated
* test input arrays.
*
* | first | second | bias | result |
* | (s, m, n) | (s, n, p) | (s, p) | (s, m, p) |
*
* The idea is to "cleanse" inputs by quantizing them and then dequantizing them
* to give us float values representative of the quantized values. We can then
* perform a standard matrix multiplication and quantize the output. This will
* match the output of a quantized matrix multiplication call.
*
* Note that this method only matches when there is no precision loss. We do
* however have precision loss since computed bias get converted to DLFloat16.
* This means results may vary slightly, especially since they are rounded.
*/
void gen_test_expected_fp32_array(uint32_t s, uint32_t m, uint32_t n,
uint32_t p, const float *first,
const float *second, const float *bias,
float Sa, float Za, float Sb, float Zb,
float Sc, float Zc, float *result, float *Sy,
float *Zy, zdnn_matmul_ops op_type) {
float min_result = FLT_MAX;
float max_result = -FLT_MAX;
for (uint32_t i = 0; i < s; i++) { // MATRIX from stack
for (uint32_t j = 0; j < m; j++) { // ROW of Mat 1
for (uint32_t k = 0; k < p; k++) { // COL of Mat 2
uint64_t result_idx = GET_FLAT_IDX(i, j, k, m, p);
uint64_t bias_idx = GET_FLAT_IDX(i, 0, k, 1, p);
float cleansed_bias = CLEANSE_QUANTIZED(bias[bias_idx], Sc, Zc);
result[result_idx] = op_type == MATMUL_OP_ADDITION ? cleansed_bias : 0;
for (uint32_t l = 0; l < n; l++) { // COL of Mat 1
uint64_t first_idx = GET_FLAT_IDX(i, j, l, m, n);
uint64_t second_idx = GET_FLAT_IDX(i, l, k, n, p);
float cleansed_first = CLEANSE_QUANTIZED(first[first_idx], Sa, Za);
float cleansed_second = CLEANSE_QUANTIZED(second[second_idx], Sb, Zb);
result[result_idx] += (cleansed_first * cleansed_second);
}
min_result = MIN(min_result, result[result_idx]);
max_result = MAX(max_result, result[result_idx]);
switch (op_type) {
case MATMUL_OP_GREATER:
result[result_idx] = result[result_idx] > cleansed_bias ? 1.f : 0.f;
break;
case MATMUL_OP_GREATER_EQUAL:
result[result_idx] = result[result_idx] >= cleansed_bias ? 1.f : 0.f;
break;
case MATMUL_OP_EQUAL:
result[result_idx] = result[result_idx] == cleansed_bias ? 1.f : 0.f;
break;
case MATMUL_OP_NOT_EQUAL:
result[result_idx] = result[result_idx] != cleansed_bias ? 1.f : 0.f;
break;
case MATMUL_OP_LESSER_EQUAL:
result[result_idx] = result[result_idx] <= cleansed_bias ? 1.f : 0.f;
break;
case MATMUL_OP_LESSER:
result[result_idx] = result[result_idx] < cleansed_bias ? 1.f : 0.f;
break;
default:
break;
}
}
}
}
// Generate output scale and offset based on min and max result
gen_scale_and_offset(min_result, max_result, Sy, Zy);
// When op_type is MATMUL_OP_ADDITION we quantize the output so it matches the
// returned output.
if (op_type == MATMUL_OP_ADDITION) {
for (uint32_t i = 0; i < s; i++) { // MATRIX from stack
for (uint32_t j = 0; j < m; j++) { // ROW of Mat 1
for (uint32_t k = 0; k < p; k++) { // COL of Mat 2
uint64_t result_idx = GET_FLAT_IDX(i, j, k, m, p);
result[result_idx] = QUANTIZE(result[result_idx], *Sy, *Zy);
}
}
}
}
}
/// Computes the folded bias to be passed to quantized matmul call when
/// operation is MATMUL_OP_ADDITION. Zb should be equal to 0, meaning the
/// correction term for input_a is also equal to 0. This allows the correction
/// term for input_b to be folded into qc_tilde, which removes the need for
/// correction being applied after the quantized matmul call.
///
/// The original equation is:
///
/// qc_tilde = Zy - (Sc / Sy) * Zc + (Sc / Sy) * q_c[j]
///
/// Since input_c is not quantized, we need to replace q_c with the equation
/// to quantize input_c.
///
/// q_c[j] = QUANTIZE(input_c[j], Sc, Zc)
/// qc_tilde = Zy - (Sc / Sy) * Zc + (Sc / Sy) * q_c[j]
///
/// The original equation for the correction term for input_b is:
///
/// M = (Sa * Sb) / Sy
/// term_b = M * Za * sum(q_b[:,j])
///
/// Since input_b is not quantized, we need to replace q_b with the equation
/// to quantize input_b.
///
/// M = (Sa * Sb) / Sy
/// term_b = M * Za * sum(QUANTIZE(input_b[:,j], Sb, Zb))
///
/// This gives us the final equation:
///
/// q_c[j] = QUANTIZE(input_c[j], Sc, Zc)
/// M = (Sa * Sb) / Sy
/// term_b = M * Za * sum(QUANTIZE(input_b[:,j], Sb, Zb))
/// qc_tilde[j] = Zy - (Sc / Sy) * Zc + (Sc / Sy) * q_c[j] - term_b
void pre_compute_folded_bias(const uint32_t s, const uint32_t n,
const uint32_t p, const float *input_b_data,
const float *input_c_data, const float Sa,
const float Za, const float Sb, const float Sc,
const float Zc, const float Sy, const float Zy,
float *output_data) {
const float M = (Sa * Sb) / Sy;
for (uint32_t i = 0; i < s; i++) {
for (uint32_t j = 0; j < p; j++) {
float sum_b = 0;
for (uint32_t k = 0; k < n; k++) {
uint64_t second_idx = GET_FLAT_IDX(i, k, j, n, p);
sum_b += QUANTIZE(input_b_data[second_idx], Sb, 0);
}
const float term_b = M * Za * sum_b;
uint64_t bias_idx = GET_FLAT_IDX(i, 0, j, 1, p);
const float q_c = QUANTIZE(input_c_data[bias_idx], Sc, Zc);
output_data[bias_idx] = Zy - (Sc / Sy) * Zc + (Sc / Sy) * q_c - term_b;
}
}
}
/// Computes the bias to be passed to quantized matmul call when operation is
/// not MATMUL_OP_ADDITION.
///
/// The original equation for qc_tilde is:
///
/// qc_tilde = Sc / (Sa * Sb) * (q_c[j] - Zc) + Za * sum(q_b[:,j])
///
/// Since input_c is not quantized, we need to replace q_c with the equation
/// to quantize input_c.
///
/// q_c[j] = QUANTIZE(input_c[j], Sc, Zc)
/// qc_tilde = Sc / (Sa * Sb) * (q_c[j] - Zc) + Za * sum(q_b[:,j])
///
/// Since input_b is not quantized, we need to replace q_b with the equation
/// to quantize input_b.
///
/// q_c[j] = QUANTIZE(input_c[j], Sc, Zc)
/// term_b = Za * sum(QUANTIZE(input_b[:,j], Sb, 0))
/// qc_tilde = Sc / (Sa * Sb) * (q_c[j] - Zc) + term_b
void pre_compute_comparison_bias(const uint32_t s, const uint32_t n,
const uint32_t p, const float *input_b_data,
const float *input_c_data, const float Sa,
const float Za, const float Sb, const float Sc,
const float Zc, const float Sy, const float Zy,
float *output_data) {
const float scale = Sc / (Sa * Sb);
for (uint64_t i = 0; i < s; i++) {
for (uint64_t j = 0; j < p; j++) {
float sum_b = 0;
for (uint32_t k = 0; k < n; k++) {
uint64_t second_idx = GET_FLAT_IDX(i, k, j, n, p);
sum_b += QUANTIZE(input_b_data[second_idx], Sb, 0);
}
const float term_b = Za * sum_b;
uint64_t bias_idx = GET_FLAT_IDX(i, 0, j, 1, p);
const float q_c = QUANTIZE(input_c_data[bias_idx], Sc, Zc);
output_data[bias_idx] = scale * (q_c - Zc) + term_b;
}
}
}
/// Call public API and checks returned status matches expected status. If OK
/// status expected, confirm actual output values match expected values.
///
/// \param[in] exp_status Expected status for the public API call
///
/// \return nothing but throws test failure if values don't match
/// expected or an unexpected failure prevents the test from completing.
///
void test_zdnn_api_quantized_matmul(
uint32_t *input_shape, zdnn_data_layouts input_layout, float *input_values,
float a_scale, float a_offset, int8_t clip_min, int8_t clip_max,
uint32_t *input_weights_shape, zdnn_data_layouts input_weights_layout,
float *input_weights_values, float b_scale, float b_offset,
uint32_t *input_biases_shape, zdnn_data_layouts input_biases_layout,
float *input_biases_values, float c_scale, float c_offset,
uint32_t *out_shape, zdnn_data_layouts out_layout,
zdnn_matmul_ops op_type, bool on_the_fly, zdnn_status exp_status,
bool disable_clipping) {
// Run test for each pretransformed data type
zdnn_ztensor *input, *weights, *biases;
if (on_the_fly) {
input = alloc_ztensor_with_values(input_shape, input_layout, FP32,
NO_CONCAT, false, input_values);
input->rec_scale = 1.f / a_scale;
input->offset = a_offset;
} else {
input = alloc_quantized_ztensor_with_values(input_shape, input_layout, FP32,
QUANTIZED_INT8, input_values,
a_scale, a_offset);
}
weights = alloc_quantized_ztensor_with_values(
input_weights_shape, input_weights_layout, INT8, QUANTIZED_WEIGHTS_INT8,
input_weights_values, b_scale, b_offset);
biases = alloc_quantized_ztensor_with_values(
input_biases_shape, input_biases_layout, FP32, QUANTIZED_INT8,
input_biases_values, c_scale, c_offset);
// Generate expected output values
uint32_t s = out_shape[0];
uint32_t m = out_shape[1];
uint32_t n = input->transformed_desc->dim1;
uint32_t p = out_shape[2];
float *exp_out_values = malloc(s * m * p * sizeof(float));
float y_scale, y_offset;
gen_test_expected_fp32_array(s, m, n, p, input_values, input_weights_values,
input_biases_values, a_scale, a_offset, b_scale,
b_offset, c_scale, c_offset, exp_out_values,
&y_scale, &y_offset, op_type);
// Run API once NULL work_area and again with work_area set.
for (int work_area_pass = 0; work_area_pass < 2; work_area_pass++) {
zdnn_ztensor *out;
out = alloc_quantized_ztensor_with_values(out_shape, out_layout, FP32,
QUANTIZED_DLFLOAT16, NULL,
y_scale, y_offset);
void *work_area = NULL;
// Set work_area during second pass
if (work_area_pass == 1) {
work_area = alloc_quantized_matmul_work_area(biases->buffer_size);
}
zdnn_status status;
status = zdnn_quantized_matmul_op(input, weights, biases, op_type, clip_min,
clip_max, disable_clipping, false, false,
work_area, out);
TEST_ASSERT_MESSAGE_FORMATTED(status == exp_status,
"work_area_pass %d call to %s() returned "
"status %08x \"%s\" but expected %08x \"%s\"",
work_area_pass, "zdnn_quantized_matmul_op",
status, zdnn_get_status_message(status),
exp_status,
zdnn_get_status_message(exp_status));
// Confirm output tensor values match expected values
if (exp_status == ZDNN_OK) {
if (op_type == MATMUL_OP_ADDITION) {
assert_quantized_ztensor_values(out, false, exp_out_values);
} else {
assert_quantized_ztensor_compare_values(out, false, exp_out_values);
}
}
// Reset output buffer
memset(out->buffer, 0, out->buffer_size);
// dequantize=true
status = zdnn_quantized_matmul_op(input, weights, biases, op_type, clip_min,
clip_max, disable_clipping, true, false,
work_area, out);
TEST_ASSERT_MESSAGE_FORMATTED(status == exp_status,
"work_area_pass %d call to %s() returned "
"status %08x \"%s\" but expected %08x \"%s\"",
work_area_pass, "zdnn_quantized_matmul_op",
status, zdnn_get_status_message(status),
exp_status,
zdnn_get_status_message(exp_status));
// Confirm output tensor values match expected values
if (exp_status == ZDNN_OK) {
if (op_type == MATMUL_OP_ADDITION) {
assert_dequantized_ztensor_values(out, false, exp_out_values);
} else {
assert_quantized_ztensor_compare_values(out, false, exp_out_values);
}
}
// Check that work_area was written to on second pass
if (work_area_pass == 1) {
free_aligned_4k(work_area);
}
free_ztensor_buffers(1, out);
} // end of work_area_pass loop
// Free expected output values
free(exp_out_values);
// Free input tensors
free_ztensor_buffers(3, input, weights, biases);
}
/// Call public API and checks returned status matches expected status. If OK
/// status expected, confirm actual output values match expected values.
///
/// \param[in] exp_status Expected status for the public API call
///
/// \return nothing but throws test failure if values don't match
/// expected or an unexpected failure prevents the test from completing.
///
void test_zdnn_api_quantized_matmul_pre_computed(
uint32_t *input_shape, zdnn_data_layouts input_layout, float *input_values,
float a_scale, float a_offset, int8_t clip_min, int8_t clip_max,
uint32_t *input_weights_shape, zdnn_data_layouts input_weights_layout,
float *input_weights_values, float b_scale, float b_offset,
uint32_t *input_biases_shape, zdnn_data_layouts input_biases_layout,
float *input_biases_values, float c_scale, float c_offset,
uint32_t *out_shape, zdnn_data_layouts out_layout,
zdnn_matmul_ops op_type, bool on_the_fly, zdnn_status exp_status) {
// Run test for each pretransformed data type
zdnn_ztensor *input, *weights, *biases;
if (on_the_fly) {
input = alloc_ztensor_with_values(input_shape, input_layout, FP32,
NO_CONCAT, false, input_values);
input->rec_scale = 1.f / a_scale;
input->offset = a_offset;
} else {
input = alloc_quantized_ztensor_with_values(input_shape, input_layout, FP32,
QUANTIZED_INT8, input_values,
a_scale, a_offset);
}
weights = alloc_quantized_ztensor_with_values(
input_weights_shape, input_weights_layout, INT8, QUANTIZED_WEIGHTS_INT8,
input_weights_values, b_scale, b_offset);
// Generate expected output values
uint32_t s = out_shape[0];
uint32_t m = out_shape[1];
uint32_t n = input->transformed_desc->dim1;
uint32_t p = out_shape[2];
float *exp_out_values = malloc(s * m * p * sizeof(float));
float y_scale, y_offset;
gen_test_expected_fp32_array(s, m, n, p, input_values, input_weights_values,
input_biases_values, a_scale, a_offset, b_scale,
b_offset, c_scale, c_offset, exp_out_values,
&y_scale, &y_offset, op_type);
// Pre-compute bias values
const uint64_t bias_s = input_biases_layout == ZDNN_2DS ? s : 1;
const uint64_t num_elements = bias_s * p;
float *computed_biases_values = malloc(num_elements * sizeof(float));
if (op_type == MATMUL_OP_ADDITION) {
pre_compute_folded_bias(
bias_s, n, p, input_weights_values, input_biases_values,
CLEANSE_FP32(a_scale), CLEANSE_FP32(a_offset), CLEANSE_FP32(b_scale),
CLEANSE_FP32(c_scale), CLEANSE_FP32(c_offset), CLEANSE_FP32(y_scale),
CLEANSE_FP32(y_offset), computed_biases_values);
} else {
pre_compute_comparison_bias(
bias_s, n, p, input_weights_values, input_biases_values,
CLEANSE_FP32(a_scale), CLEANSE_FP32(a_offset), CLEANSE_FP32(b_scale),
CLEANSE_FP32(c_scale), CLEANSE_FP32(c_offset), CLEANSE_FP32(y_scale),
CLEANSE_FP32(y_offset), computed_biases_values);
}
biases =
alloc_ztensor_with_values(input_biases_shape, input_biases_layout, FP32,
NO_CONCAT, false, computed_biases_values);
biases->rec_scale = 1.f / c_scale;
biases->offset = c_offset;
zdnn_ztensor *out;
out = alloc_quantized_ztensor_with_values(out_shape, out_layout, FP32,
QUANTIZED_DLFLOAT16, NULL, y_scale,
y_offset);
// pre_computed=true
zdnn_status status =
zdnn_quantized_matmul_op(input, weights, biases, op_type, clip_min,
clip_max, false, false, true, NULL, out);
TEST_ASSERT_MESSAGE_FORMATTED(status == exp_status,
"call to %s() returned status %08x \"%s\" but "
"expected %08x \"%s\"",
"zdnn_quantized_matmul_op", status,
zdnn_get_status_message(status), exp_status,
zdnn_get_status_message(exp_status));
// Confirm output tensor values match expected values
if (exp_status == ZDNN_OK) {
if (op_type == MATMUL_OP_ADDITION) {
assert_quantized_ztensor_values(out, false, exp_out_values);
} else {
assert_quantized_ztensor_compare_values(out, false, exp_out_values);
}
}
memset(out->buffer, 0, out->buffer_size);
// dequantize=true
// pre_computed=true
status = zdnn_quantized_matmul_op(input, weights, biases, op_type, clip_min,
clip_max, false, true, true, NULL, out);
TEST_ASSERT_MESSAGE_FORMATTED(status == exp_status,
"call to %s() returned status %08x \"%s\" but "
"expected %08x \"%s\"",
"zdnn_quantized_matmul_op", status,
zdnn_get_status_message(status), exp_status,
zdnn_get_status_message(exp_status));
// Confirm output tensor values match expected values
if (exp_status == ZDNN_OK) {
if (op_type == MATMUL_OP_ADDITION) {
assert_dequantized_ztensor_values(out, false, exp_out_values);
} else {
assert_quantized_ztensor_compare_values(out, false, exp_out_values);
}
}
// Free expected output and computes bias values
free(exp_out_values);
free(computed_biases_values);
// Free input/ouput tensors
free_ztensor_buffers(4, input, weights, biases, out);
}
/**
* - Quantized MatMul BiasAdd (stacked)
*
* - Matrix input_a = s x m x n --Randomly Generated Array
* - Matrix input_b = s x n x p --Randomly Generated Array
* - Matrix bias = s x p --Randomly Generated Array
* - Matrix output = s x m x p
*/
void quantized_matmul_smn_by_snp(uint64_t s, uint64_t m, uint64_t n, int64_t p,
zdnn_matmul_ops op_type, bool symmetric,
bool on_the_fly, bool pre_compute) {
uint64_t num_values = 0;
// Setup Input A using random values
uint32_t input_a_shape[] = {s, m, n};
num_values = s * m * n;
float *input_a_values = malloc(num_values * sizeof(float));
float a_min = -100.f;
float a_max = 80.f;
gen_random_float_array_range(num_values, input_a_values, a_min, a_max);
float a_scale, a_offset;
gen_scale_and_offset(a_min, a_max, &a_scale, &a_offset);
// Setup Input B using random values
uint32_t input_b_shape[] = {s, n, p};
num_values = s * n * p;
float *input_b_values = malloc(num_values * sizeof(float));
float b_min = -20.f;
float b_max = symmetric ? 20.f : 10.f;
gen_random_float_array_range(num_values, input_b_values, b_min, b_max);
float b_scale, b_offset;
gen_scale_and_offset(b_min, b_max, &b_scale, &b_offset);
// Setup Input bias using random values
uint32_t input_c_shape[] = {s, p};
num_values = s * p;
float *input_c_values = malloc(num_values * sizeof(float));
float c_min = -500.f;
float c_max = 500.f;
gen_random_float_array_range(num_values, input_c_values, c_min, c_max);
float c_scale, c_offset;
gen_scale_and_offset(c_min, c_max, &c_scale, &c_offset);
// Setup Output and expected values
uint32_t output_shape[] = {s, m, p};
if (pre_compute) {
test_zdnn_api_quantized_matmul_pre_computed(
input_a_shape, ZDNN_3DS, input_a_values, a_scale, a_offset, INT8_MIN,
INT8_MAX,
input_b_shape, ZDNN_3DS, input_b_values, b_scale, b_offset,
input_c_shape, ZDNN_2DS, input_c_values, c_scale, c_offset,
output_shape, ZDNN_3DS,
op_type, on_the_fly, ZDNN_OK);
} else {
test_zdnn_api_quantized_matmul(
input_a_shape, ZDNN_3DS, input_a_values, a_scale, a_offset, INT8_MIN,
INT8_MAX,
input_b_shape, ZDNN_3DS, input_b_values, b_scale, b_offset,
input_c_shape, ZDNN_2DS, input_c_values, c_scale, c_offset,
output_shape, ZDNN_3DS,
op_type, on_the_fly, ZDNN_OK, default_disable_clipping);
}
free(input_a_values);
free(input_b_values);
free(input_c_values);
}
/**
* - Quantized MatMul BiasAdd (bcast1)
*
* - Matrix input_a = m x n --Randomly Generated Array
* - Matrix input_b = s x n x p --Randomly Generated Array
* - Matrix bias = s x p --Randomly Generated Array
* - Matrix output = s x m x p
*/
void quantized_matmul_mn_by_snp(uint64_t s, uint64_t m, uint64_t n, uint64_t p,
zdnn_matmul_ops op_type, bool symmetric,
bool on_the_fly, bool pre_compute) {
uint64_t num_values = 0;
// Setup Input A using random values
uint32_t input_a_shape[] = {m, n};
num_values = m * n;
float *input_a_values = malloc(s * num_values * sizeof(float));
float a_min = -100.f;
float a_max = 80.f;
gen_random_float_array_range(num_values, input_a_values, a_min, a_max);
float a_scale, a_offset;
gen_scale_and_offset(a_min, a_max, &a_scale, &a_offset);
// manually "broadcast" those m*n entries s times across input_a_values[]
// because gen_test_expected_fp32_array() doesn't handle broadcast natively
uint64_t size = num_values * sizeof(float);
uint8_t *tmp_ptr = (uint8_t *)((uintptr_t)input_a_values + size);
for (uint64_t i = 1; i < s; i++) {
memcpy((void *)tmp_ptr, (void *)input_a_values, size);
tmp_ptr += size;
}
// Setup Input B using random values
uint32_t input_b_shape[] = {s, n, p};
num_values = s * n * p;
float *input_b_values = malloc(num_values * sizeof(float));
float b_min = -20.f;
float b_max = symmetric ? 20.f : 10.f;
gen_random_float_array_range(num_values, input_b_values, b_min, b_max);
float b_scale, b_offset;
gen_scale_and_offset(b_min, b_max, &b_scale, &b_offset);
// Setup Input bias using random values
uint32_t input_c_shape[] = {s, p};
num_values = s * p;
float *input_c_values = malloc(num_values * sizeof(float));
float c_min = -500.f;
float c_max = 500.f;
gen_random_float_array_range(num_values, input_c_values, c_min, c_max);
float c_scale, c_offset;
gen_scale_and_offset(c_min, c_max, &c_scale, &c_offset);
// Setup Output and expected values
uint32_t output_shape[] = {s, m, p};
if (pre_compute) {
test_zdnn_api_quantized_matmul_pre_computed(
input_a_shape, ZDNN_2D, input_a_values, a_scale, a_offset, INT8_MIN,
INT8_MAX,
input_b_shape, ZDNN_3DS, input_b_values, b_scale, b_offset,
input_c_shape, ZDNN_2DS, input_c_values, c_scale, c_offset,
output_shape, ZDNN_3DS,
op_type, on_the_fly, ZDNN_OK);
} else {
test_zdnn_api_quantized_matmul(
input_a_shape, ZDNN_2D, input_a_values, a_scale, a_offset, INT8_MIN,
INT8_MAX,
input_b_shape, ZDNN_3DS, input_b_values, b_scale, b_offset,
input_c_shape, ZDNN_2DS, input_c_values, c_scale, c_offset,
output_shape, ZDNN_3DS,
op_type, on_the_fly, ZDNN_OK, default_disable_clipping);
}
free(input_a_values);
free(input_b_values);
free(input_c_values);
}
/**
* - Quantized MatMul BiasAdd (bcast23)
*
* - Matrix input_a = s x m x n --Randomly Generated Array
* - Matrix input_b = n x p --Randomly Generated Array
* - Matrix bias = p --Randomly Generated Array
* - Matrix output = s x m x p
*/
void quantized_matmul_smn_by_np(uint64_t s, uint64_t m, uint64_t n, uint64_t p,
zdnn_matmul_ops op_type, bool symmetric,
bool on_the_fly, bool pre_compute) {
uint64_t num_values = 0;
// Setup Input A using random values
uint32_t input_a_shape[] = {s, m, n};
num_values = s * m * n;
float *input_a_values = malloc(num_values * sizeof(float));
float a_min = -100.f;
float a_max = 80.f;
gen_random_float_array_range(num_values, input_a_values, a_min, a_max);
float a_scale, a_offset;
gen_scale_and_offset(a_min, a_max, &a_scale, &a_offset);
// Setup Input B using random values
uint32_t input_b_shape[] = {n, p};
num_values = n * p;
float *input_b_values = malloc(s * num_values * sizeof(float));
float b_min = -20.f;
float b_max = symmetric ? 20.f : 10.f;
gen_random_float_array_range(num_values, input_b_values, b_min, b_max);
float b_scale, b_offset;
gen_scale_and_offset(b_min, b_max, &b_scale, &b_offset);
// manually "broadcast" those n*p entries s times across input_b_values[]
// because gen_test_expected_fp32_array() doesn't handle broadcast natively
uint64_t size = num_values * sizeof(float);
uint8_t *tmp_ptr = (uint8_t *)((uintptr_t)input_b_values + size);
for (uint64_t i = 1; i < s; i++) {
memcpy((void *)tmp_ptr, (void *)input_b_values, size);
tmp_ptr += size;
}
// Setup Input bias using random values
uint32_t input_c_shape[] = {p};
num_values = p;
float *input_c_values = malloc(s * num_values * sizeof(float));
float c_min = -500.f;
float c_max = 500.f;
gen_random_float_array_range(num_values, input_c_values, c_min, c_max);
float c_scale, c_offset;
gen_scale_and_offset(c_min, c_max, &c_scale, &c_offset);
// manually "broadcast" those p entries s times across input_c_values[]
// because gen_test_expected_fp32_array() doesn't handle broadcast natively
size = num_values * sizeof(float);
tmp_ptr = (uint8_t *)((uintptr_t)input_c_values + size);
for (uint64_t i = 1; i < s; i++) {
memcpy((void *)tmp_ptr, (void *)input_c_values, size);
tmp_ptr += size;
}
// Setup Output and expected values
uint32_t output_shape[] = {s, m, p};
if (pre_compute) {
test_zdnn_api_quantized_matmul_pre_computed(
input_a_shape, ZDNN_3DS, input_a_values, a_scale, a_offset, INT8_MIN,
INT8_MAX,
input_b_shape, ZDNN_2D, input_b_values, b_scale, b_offset,
input_c_shape, ZDNN_1D, input_c_values, c_scale, c_offset,
output_shape, ZDNN_3DS,
op_type, on_the_fly, ZDNN_OK);
} else {
test_zdnn_api_quantized_matmul(
input_a_shape, ZDNN_3DS, input_a_values, a_scale, a_offset, INT8_MIN,
INT8_MAX,
input_b_shape, ZDNN_2D, input_b_values, b_scale, b_offset,
input_c_shape, ZDNN_1D, input_c_values, c_scale, c_offset,
output_shape, ZDNN_3DS,
op_type, on_the_fly, ZDNN_OK, default_disable_clipping);
}
free(input_a_values);
free(input_b_values);
free(input_c_values);
}
/******************************************************************************
BiasAdd Tests
******************************************************************************/
void quantized_matmul_biasadd_basic() {
zdnn_matmul_ops op_type = MATMUL_OP_ADDITION;
bool on_the_fly = false;
test_zdnn_api_quantized_matmul(
default_input_shape, ZDNN_3DS, default_input_values, default_input_scale,
default_input_offset, INT8_MIN, INT8_MAX,
default_weights_shape, ZDNN_3DS, default_weights_values,
default_weights_scale, default_weights_offset,
default_biases_shape, ZDNN_2DS, default_biases_values,
default_biases_scale, default_biases_offset,
default_output_shape, ZDNN_3DS,
op_type, on_the_fly, ZDNN_OK, default_disable_clipping);
}
void quantized_matmul_biasadd_bcast1_basic() {
zdnn_matmul_ops op_type = MATMUL_OP_ADDITION;
bool on_the_fly = false;
test_zdnn_api_quantized_matmul(
bcast_input_shape, ZDNN_2D, default_input_values, default_input_scale,
default_input_offset, INT8_MIN, INT8_MAX,
default_weights_shape, ZDNN_3DS, default_weights_values,
default_weights_scale, default_weights_offset,
default_biases_shape, ZDNN_2DS, default_biases_values,
default_biases_scale, default_biases_offset,
default_output_shape, ZDNN_3DS,
op_type, on_the_fly, ZDNN_OK, default_disable_clipping);
}
void quantized_matmul_biasadd_bcast23_basic() {
zdnn_matmul_ops op_type = MATMUL_OP_ADDITION;
bool on_the_fly = false;
test_zdnn_api_quantized_matmul(
default_input_shape, ZDNN_3DS, default_input_values, default_input_scale,
default_input_offset, INT8_MIN, INT8_MAX,
bcast_weights_shape, ZDNN_2D, default_weights_values,
default_weights_scale, default_weights_offset,
bcast_biases_shape, ZDNN_1D, default_biases_values, default_biases_scale,
default_biases_offset,
default_output_shape, ZDNN_3DS,
op_type, on_the_fly, ZDNN_OK, default_disable_clipping);
}
// Quantized MatMul with symmetric weights (Zb == 0), which will fold correction
// term for input_a into bias
void quantized_matmul_biasadd_symmetric() {
zdnn_matmul_ops op_type = MATMUL_OP_ADDITION;
bool on_the_fly = false;
test_zdnn_api_quantized_matmul(
default_input_shape, ZDNN_3DS, default_input_values, default_input_scale,
default_input_offset, INT8_MIN, INT8_MAX,
default_weights_shape, ZDNN_3DS, default_weights_values,
symmetric_weights_scale, symmetric_weights_offset,
default_biases_shape, ZDNN_2DS, default_biases_values,
default_biases_scale, default_biases_offset,
default_output_shape, ZDNN_3DS,
op_type, on_the_fly, ZDNN_OK, default_disable_clipping);
}
void quantized_matmul_biasadd_symmetric_no_clipping() {
zdnn_matmul_ops op_type = MATMUL_OP_ADDITION;
bool on_the_fly = false;
bool disable_clipping = true;
test_zdnn_api_quantized_matmul(
default_input_shape, ZDNN_3DS, default_input_values, default_input_scale,
default_input_offset, INT8_MIN, INT8_MAX,
default_weights_shape, ZDNN_3DS, default_weights_values,
symmetric_weights_scale, symmetric_weights_offset,
default_biases_shape, ZDNN_2DS, default_biases_values,
default_biases_scale, default_biases_offset,
default_output_shape, ZDNN_3DS,
op_type, on_the_fly, ZDNN_OK, disable_clipping);
}
void quantized_matmul_biasadd_bcast1_symmetric() {
zdnn_matmul_ops op_type = MATMUL_OP_ADDITION;
bool on_the_fly = false;
test_zdnn_api_quantized_matmul(
bcast_input_shape, ZDNN_2D, default_input_values, default_input_scale,
default_input_offset, INT8_MIN, INT8_MAX,
default_weights_shape, ZDNN_3DS, default_weights_values,
symmetric_weights_scale, symmetric_weights_offset,
default_biases_shape, ZDNN_2DS, default_biases_values,
default_biases_scale, default_biases_offset,
default_output_shape, ZDNN_3DS,
op_type, on_the_fly, ZDNN_OK, default_disable_clipping);
}
void quantized_matmul_biasadd_bcast1_symmetric_no_clipping() {
zdnn_matmul_ops op_type = MATMUL_OP_ADDITION;
bool on_the_fly = false;
bool disable_clipping = true;
test_zdnn_api_quantized_matmul(
bcast_input_shape, ZDNN_2D, default_input_values, default_input_scale,
default_input_offset, INT8_MIN, INT8_MAX,
default_weights_shape, ZDNN_3DS, default_weights_values,
symmetric_weights_scale, symmetric_weights_offset,
default_biases_shape, ZDNN_2DS, default_biases_values,
default_biases_scale, default_biases_offset,
default_output_shape, ZDNN_3DS,
op_type, on_the_fly, ZDNN_OK, disable_clipping);
}
void quantized_matmul_biasadd_bcast23_symmetric() {
zdnn_matmul_ops op_type = MATMUL_OP_ADDITION;
bool on_the_fly = false;
test_zdnn_api_quantized_matmul(
default_input_shape, ZDNN_3DS, default_input_values, default_input_scale,
default_input_offset, INT8_MIN, INT8_MAX,
bcast_weights_shape, ZDNN_2D, default_weights_values,
symmetric_weights_scale, symmetric_weights_offset,
bcast_biases_shape, ZDNN_1D, default_biases_values, default_biases_scale,
default_biases_offset,
default_output_shape, ZDNN_3DS,
op_type, on_the_fly, ZDNN_OK, default_disable_clipping);
}
void quantized_matmul_biasadd_bcast23_symmetric_no_clipping() {
zdnn_matmul_ops op_type = MATMUL_OP_ADDITION;
bool on_the_fly = false;
bool disable_clipping = true;
test_zdnn_api_quantized_matmul(
default_input_shape, ZDNN_3DS, default_input_values, default_input_scale,
default_input_offset, INT8_MIN, INT8_MAX,
bcast_weights_shape, ZDNN_2D, default_weights_values,
symmetric_weights_scale, symmetric_weights_offset,
bcast_biases_shape, ZDNN_1D, default_biases_values, default_biases_scale,
default_biases_offset,
default_output_shape, ZDNN_3DS,
op_type, on_the_fly, ZDNN_OK, disable_clipping);
}
// Quantized MatMul with unquantized input, which will quantize the input on the
// fly
void quantized_matmul_biasadd_on_the_fly() {
zdnn_matmul_ops op_type = MATMUL_OP_ADDITION;
bool on_the_fly = true;
test_zdnn_api_quantized_matmul(
default_input_shape, ZDNN_3DS, default_input_values, default_input_scale,
default_input_offset, INT8_MIN, INT8_MAX,
default_weights_shape, ZDNN_3DS, default_weights_values,
default_weights_scale, default_weights_offset,
default_biases_shape, ZDNN_2DS, default_biases_values,
default_biases_scale, default_biases_offset,
default_output_shape, ZDNN_3DS,
op_type, on_the_fly, ZDNN_OK, default_disable_clipping);
}
void quantized_matmul_biasadd_bcast1_on_the_fly() {
zdnn_matmul_ops op_type = MATMUL_OP_ADDITION;
bool on_the_fly = true;
test_zdnn_api_quantized_matmul(
bcast_input_shape, ZDNN_2D, default_input_values, default_input_scale,
default_input_offset, INT8_MIN, INT8_MAX,
default_weights_shape, ZDNN_3DS, default_weights_values,
default_weights_scale, default_weights_offset,
default_biases_shape, ZDNN_2DS, default_biases_values,
default_biases_scale, default_biases_offset,
default_output_shape, ZDNN_3DS,
op_type, on_the_fly, ZDNN_OK, default_disable_clipping);
}
void quantized_matmul_biasadd_bcast23_on_the_fly() {
zdnn_matmul_ops op_type = MATMUL_OP_ADDITION;
bool on_the_fly = true;
test_zdnn_api_quantized_matmul(
default_input_shape, ZDNN_3DS, default_input_values, default_input_scale,
default_input_offset, INT8_MIN, INT8_MAX,
bcast_weights_shape, ZDNN_2D, default_weights_values,
default_weights_scale, default_weights_offset,
bcast_biases_shape, ZDNN_1D, default_biases_values, default_biases_scale,
default_biases_offset,
default_output_shape, ZDNN_3DS,
op_type, on_the_fly, ZDNN_OK, default_disable_clipping);
}
void quantized_matmul_biasadd_2x20x40_by_2x40x30() {
zdnn_matmul_ops op_type = MATMUL_OP_ADDITION;
bool symmetric = false;
bool on_the_fly = false;
bool pre_compute = false;
quantized_matmul_smn_by_snp(2, 20, 40, 30, op_type, symmetric, on_the_fly,
pre_compute);
}
void quantized_matmul_biasadd_20x40_by_2x40x30() {
zdnn_matmul_ops op_type = MATMUL_OP_ADDITION;
bool symmetric = false;
bool on_the_fly = false;
bool pre_compute = false;
quantized_matmul_mn_by_snp(2, 20, 40, 30, op_type, symmetric, on_the_fly,
pre_compute);
}
void quantized_matmul_biasadd_2x20x40_by_40x30() {
zdnn_matmul_ops op_type = MATMUL_OP_ADDITION;
bool symmetric = false;
bool on_the_fly = false;
bool pre_compute = false;
quantized_matmul_smn_by_np(2, 20, 40, 30, op_type, symmetric, on_the_fly,
pre_compute);
}
void quantized_matmul_biasadd_symmetric_2x20x40_by_2x40x30() {
zdnn_matmul_ops op_type = MATMUL_OP_ADDITION;
bool symmetric = true;
bool on_the_fly = false;
bool pre_compute = false;
quantized_matmul_smn_by_snp(2, 20, 40, 30, op_type, symmetric, on_the_fly,
pre_compute);
}
void quantized_matmul_biasadd_symmetric_20x40_by_2x40x30() {
zdnn_matmul_ops op_type = MATMUL_OP_ADDITION;
bool symmetric = true;
bool on_the_fly = false;
bool pre_compute = false;
quantized_matmul_mn_by_snp(2, 20, 40, 30, op_type, symmetric, on_the_fly,
pre_compute);
}
void quantized_matmul_biasadd_symmetric_2x20x40_by_40x30() {
zdnn_matmul_ops op_type = MATMUL_OP_ADDITION;
bool symmetric = true;
bool on_the_fly = false;
bool pre_compute = false;
quantized_matmul_smn_by_np(2, 20, 40, 30, op_type, symmetric, on_the_fly,
pre_compute);
}
void quantized_matmul_biasadd_on_the_fly_2x20x40_by_2x40x30() {
zdnn_matmul_ops op_type = MATMUL_OP_ADDITION;
bool symmetric = false;
bool on_the_fly = true;
bool pre_compute = false;
quantized_matmul_smn_by_snp(2, 20, 40, 30, op_type, symmetric, on_the_fly,
pre_compute);
}
void quantized_matmul_biasadd_on_the_fly_20x40_by_2x40x30() {
zdnn_matmul_ops op_type = MATMUL_OP_ADDITION;
bool symmetric = false;
bool on_the_fly = true;
bool pre_compute = false;
quantized_matmul_mn_by_snp(2, 20, 40, 30, op_type, symmetric, on_the_fly,
pre_compute);
}
void quantized_matmul_biasadd_on_the_fly_2x20x40_by_40x30() {
zdnn_matmul_ops op_type = MATMUL_OP_ADDITION;
bool symmetric = false;
bool on_the_fly = true;
bool pre_compute = false;
quantized_matmul_smn_by_np(2, 20, 40, 30, op_type, symmetric, on_the_fly,
pre_compute);
}
/******************************************************************************
Compare Tests
******************************************************************************/
void quantized_matmul_greater_basic() {
zdnn_matmul_ops op_type = MATMUL_OP_GREATER;
bool on_the_fly = false;
test_zdnn_api_quantized_matmul(
default_input_shape, ZDNN_3DS, default_input_values, default_input_scale,
default_input_offset, INT8_MIN, INT8_MAX,
default_weights_shape, ZDNN_3DS, default_weights_values,
symmetric_weights_scale, symmetric_weights_offset,
default_biases_shape, ZDNN_2DS, default_biases_values,
default_biases_scale, default_biases_offset,
default_output_shape, ZDNN_3DS,
op_type, on_the_fly, ZDNN_OK, default_disable_clipping);
}
void quantized_matmul_greater_bcast1_basic() {
zdnn_matmul_ops op_type = MATMUL_OP_GREATER;
bool on_the_fly = false;
test_zdnn_api_quantized_matmul(
bcast_input_shape, ZDNN_2D, default_input_values, default_input_scale,
default_input_offset, INT8_MIN, INT8_MAX,
default_weights_shape, ZDNN_3DS, default_weights_values,
symmetric_weights_scale, symmetric_weights_offset,
default_biases_shape, ZDNN_2DS, default_biases_values,
default_biases_scale, default_biases_offset,
default_output_shape, ZDNN_3DS,
op_type, on_the_fly, ZDNN_OK, default_disable_clipping);
}
void quantized_matmul_greater_bcast23_basic() {
zdnn_matmul_ops op_type = MATMUL_OP_GREATER;
bool on_the_fly = false;
test_zdnn_api_quantized_matmul(
default_input_shape, ZDNN_3DS, default_input_values, default_input_scale,
default_input_offset, INT8_MIN, INT8_MAX,
bcast_weights_shape, ZDNN_2D, default_weights_values,
symmetric_weights_scale, symmetric_weights_offset,
bcast_biases_shape, ZDNN_1D, default_biases_values, default_biases_scale,
default_biases_offset,
default_output_shape, ZDNN_3DS,
op_type, on_the_fly, ZDNN_OK, default_disable_clipping);
}
void quantized_matmul_greater_equal_basic() {
zdnn_matmul_ops op_type = MATMUL_OP_GREATER_EQUAL;
bool on_the_fly = false;
test_zdnn_api_quantized_matmul(
default_input_shape, ZDNN_3DS, default_input_values, default_input_scale,
default_input_offset, INT8_MIN, INT8_MAX,
default_weights_shape, ZDNN_3DS, default_weights_values,
symmetric_weights_scale, symmetric_weights_offset,
default_biases_shape, ZDNN_2DS, default_biases_values,
default_biases_scale, default_biases_offset,
default_output_shape, ZDNN_3DS,
op_type, on_the_fly, ZDNN_OK, default_disable_clipping);
}
void quantized_matmul_greater_equal_bcast1_basic() {
zdnn_matmul_ops op_type = MATMUL_OP_GREATER_EQUAL;
bool on_the_fly = false;
test_zdnn_api_quantized_matmul(
bcast_input_shape, ZDNN_2D, default_input_values, default_input_scale,
default_input_offset, INT8_MIN, INT8_MAX,
default_weights_shape, ZDNN_3DS, default_weights_values,
symmetric_weights_scale, symmetric_weights_offset,
default_biases_shape, ZDNN_2DS, default_biases_values,
default_biases_scale, default_biases_offset,
default_output_shape, ZDNN_3DS,
op_type, on_the_fly, ZDNN_OK, default_disable_clipping);
}
void quantized_matmul_greater_equal_bcast23_basic() {
zdnn_matmul_ops op_type = MATMUL_OP_GREATER_EQUAL;
bool on_the_fly = false;
test_zdnn_api_quantized_matmul(
default_input_shape, ZDNN_3DS, default_input_values, default_input_scale,
default_input_offset, INT8_MIN, INT8_MAX,
bcast_weights_shape, ZDNN_2D, default_weights_values,
symmetric_weights_scale, symmetric_weights_offset,
bcast_biases_shape, ZDNN_1D, default_biases_values, default_biases_scale,
default_biases_offset,
default_output_shape, ZDNN_3DS,
op_type, on_the_fly, ZDNN_OK, default_disable_clipping);
}
void quantized_matmul_equal_basic() {
zdnn_matmul_ops op_type = MATMUL_OP_EQUAL;
bool on_the_fly = false;
test_zdnn_api_quantized_matmul(
default_input_shape, ZDNN_3DS, default_input_values, default_input_scale,
default_input_offset, INT8_MIN, INT8_MAX,
default_weights_shape, ZDNN_3DS, default_weights_values,
symmetric_weights_scale, symmetric_weights_offset,
default_biases_shape, ZDNN_2DS, default_biases_values,
default_biases_scale, default_biases_offset,
default_output_shape, ZDNN_3DS,
op_type, on_the_fly, ZDNN_OK, default_disable_clipping);
}
void quantized_matmul_equal_bcast1_basic() {
zdnn_matmul_ops op_type = MATMUL_OP_EQUAL;
bool on_the_fly = false;
test_zdnn_api_quantized_matmul(
bcast_input_shape, ZDNN_2D, default_input_values, default_input_scale,
default_input_offset, INT8_MIN, INT8_MAX,
default_weights_shape, ZDNN_3DS, default_weights_values,
symmetric_weights_scale, symmetric_weights_offset,
default_biases_shape, ZDNN_2DS, default_biases_values,
default_biases_scale, default_biases_offset,
default_output_shape, ZDNN_3DS,
op_type, on_the_fly, ZDNN_OK, default_disable_clipping);
}
void quantized_matmul_equal_bcast23_basic() {
zdnn_matmul_ops op_type = MATMUL_OP_EQUAL;
bool on_the_fly = false;
test_zdnn_api_quantized_matmul(
default_input_shape, ZDNN_3DS, default_input_values, default_input_scale,
default_input_offset, INT8_MIN, INT8_MAX,
bcast_weights_shape, ZDNN_2D, default_weights_values,
symmetric_weights_scale, symmetric_weights_offset,
bcast_biases_shape, ZDNN_1D, default_biases_values, default_biases_scale,
default_biases_offset,
default_output_shape, ZDNN_3DS,
op_type, on_the_fly, ZDNN_OK, default_disable_clipping);
}
void quantized_matmul_not_equal_basic() {
zdnn_matmul_ops op_type = MATMUL_OP_NOT_EQUAL;
bool on_the_fly = false;
test_zdnn_api_quantized_matmul(
default_input_shape, ZDNN_3DS, default_input_values, default_input_scale,
default_input_offset, INT8_MIN, INT8_MAX,
default_weights_shape, ZDNN_3DS, default_weights_values,
symmetric_weights_scale, symmetric_weights_offset,
default_biases_shape, ZDNN_2DS, default_biases_values,
default_biases_scale, default_biases_offset,
default_output_shape, ZDNN_3DS,
op_type, on_the_fly, ZDNN_OK, default_disable_clipping);
}
void quantized_matmul_not_equal_bcast1_basic() {
zdnn_matmul_ops op_type = MATMUL_OP_NOT_EQUAL;
bool on_the_fly = false;
test_zdnn_api_quantized_matmul(
bcast_input_shape, ZDNN_2D, default_input_values, default_input_scale,
default_input_offset, INT8_MIN, INT8_MAX,
default_weights_shape, ZDNN_3DS, default_weights_values,
symmetric_weights_scale, symmetric_weights_offset,
default_biases_shape, ZDNN_2DS, default_biases_values,
default_biases_scale, default_biases_offset,
default_output_shape, ZDNN_3DS,
op_type, on_the_fly, ZDNN_OK, default_disable_clipping);
}
void quantized_matmul_not_equal_bcast23_basic() {
zdnn_matmul_ops op_type = MATMUL_OP_NOT_EQUAL;
bool on_the_fly = false;
test_zdnn_api_quantized_matmul(
default_input_shape, ZDNN_3DS, default_input_values, default_input_scale,
default_input_offset, INT8_MIN, INT8_MAX,
bcast_weights_shape, ZDNN_2D, default_weights_values,
symmetric_weights_scale, symmetric_weights_offset,
bcast_biases_shape, ZDNN_1D, default_biases_values, default_biases_scale,
default_biases_offset,
default_output_shape, ZDNN_3DS,
op_type, on_the_fly, ZDNN_OK, default_disable_clipping);
}
void quantized_matmul_lesser_equal_basic() {
zdnn_matmul_ops op_type = MATMUL_OP_LESSER_EQUAL;
bool on_the_fly = false;
test_zdnn_api_quantized_matmul(
default_input_shape, ZDNN_3DS, default_input_values, default_input_scale,
default_input_offset, INT8_MIN, INT8_MAX,
default_weights_shape, ZDNN_3DS, default_weights_values,
symmetric_weights_scale, symmetric_weights_offset,
default_biases_shape, ZDNN_2DS, default_biases_values,
default_biases_scale, default_biases_offset,
default_output_shape, ZDNN_3DS,
op_type, on_the_fly, ZDNN_OK, default_disable_clipping);
}
void quantized_matmul_lesser_equal_bcast1_basic() {
zdnn_matmul_ops op_type = MATMUL_OP_LESSER_EQUAL;
bool on_the_fly = false;
test_zdnn_api_quantized_matmul(
bcast_input_shape, ZDNN_2D, default_input_values, default_input_scale,
default_input_offset, INT8_MIN, INT8_MAX,
default_weights_shape, ZDNN_3DS, default_weights_values,
symmetric_weights_scale, symmetric_weights_offset,
default_biases_shape, ZDNN_2DS, default_biases_values,
default_biases_scale, default_biases_offset,
default_output_shape, ZDNN_3DS,
op_type, on_the_fly, ZDNN_OK, default_disable_clipping);
}
void quantized_matmul_lesser_equal_bcast23_basic() {
zdnn_matmul_ops op_type = MATMUL_OP_LESSER_EQUAL;
bool on_the_fly = false;
test_zdnn_api_quantized_matmul(
default_input_shape, ZDNN_3DS, default_input_values, default_input_scale,
default_input_offset, INT8_MIN, INT8_MAX,
bcast_weights_shape, ZDNN_2D, default_weights_values,
symmetric_weights_scale, symmetric_weights_offset,
bcast_biases_shape, ZDNN_1D, default_biases_values, default_biases_scale,
default_biases_offset,
default_output_shape, ZDNN_3DS,
op_type, on_the_fly, ZDNN_OK, default_disable_clipping);
}
void quantized_matmul_lesser_basic() {
zdnn_matmul_ops op_type = MATMUL_OP_LESSER;
bool on_the_fly = false;
test_zdnn_api_quantized_matmul(
default_input_shape, ZDNN_3DS, default_input_values, default_input_scale,
default_input_offset, INT8_MIN, INT8_MAX,
default_weights_shape, ZDNN_3DS, default_weights_values,
symmetric_weights_scale, symmetric_weights_offset,
default_biases_shape, ZDNN_2DS, default_biases_values,
default_biases_scale, default_biases_offset,
default_output_shape, ZDNN_3DS,
op_type, on_the_fly, ZDNN_OK, default_disable_clipping);
}
void quantized_matmul_lesser_bcast1_basic() {
zdnn_matmul_ops op_type = MATMUL_OP_LESSER;
bool on_the_fly = false;
test_zdnn_api_quantized_matmul(
bcast_input_shape, ZDNN_2D, default_input_values, default_input_scale,
default_input_offset, INT8_MIN, INT8_MAX,
default_weights_shape, ZDNN_3DS, default_weights_values,
symmetric_weights_scale, symmetric_weights_offset,
default_biases_shape, ZDNN_2DS, default_biases_values,
default_biases_scale, default_biases_offset,
default_output_shape, ZDNN_3DS,
op_type, on_the_fly, ZDNN_OK, default_disable_clipping);
}
void quantized_matmul_lesser_bcast23_basic() {
zdnn_matmul_ops op_type = MATMUL_OP_LESSER;
bool on_the_fly = false;
test_zdnn_api_quantized_matmul(
default_input_shape, ZDNN_3DS, default_input_values, default_input_scale,
default_input_offset, INT8_MIN, INT8_MAX,
bcast_weights_shape, ZDNN_2D, default_weights_values,
symmetric_weights_scale, symmetric_weights_offset,
bcast_biases_shape, ZDNN_1D, default_biases_values, default_biases_scale,
default_biases_offset,
default_output_shape, ZDNN_3DS,
op_type, on_the_fly, ZDNN_OK, default_disable_clipping);
}
void quantized_matmul_on_the_fly_greater_basic() {
zdnn_matmul_ops op_type = MATMUL_OP_GREATER;
bool on_the_fly = true;
test_zdnn_api_quantized_matmul(
default_input_shape, ZDNN_3DS, default_input_values, default_input_scale,
default_input_offset, INT8_MIN, INT8_MAX,
default_weights_shape, ZDNN_3DS, default_weights_values,
symmetric_weights_scale, symmetric_weights_offset,
default_biases_shape, ZDNN_2DS, default_biases_values,
default_biases_scale, default_biases_offset,
default_output_shape, ZDNN_3DS,
op_type, on_the_fly, ZDNN_OK, default_disable_clipping);
}
void quantized_matmul_on_the_fly_greater_bcast1_basic() {
zdnn_matmul_ops op_type = MATMUL_OP_GREATER;
bool on_the_fly = true;
test_zdnn_api_quantized_matmul(
bcast_input_shape, ZDNN_2D, default_input_values, default_input_scale,
default_input_offset, INT8_MIN, INT8_MAX,
default_weights_shape, ZDNN_3DS, default_weights_values,
symmetric_weights_scale, symmetric_weights_offset,
default_biases_shape, ZDNN_2DS, default_biases_values,
default_biases_scale, default_biases_offset,
default_output_shape, ZDNN_3DS,
op_type, on_the_fly, ZDNN_OK, default_disable_clipping);
}
void quantized_matmul_on_the_fly_greater_bcast23_basic() {
zdnn_matmul_ops op_type = MATMUL_OP_GREATER;
bool on_the_fly = true;
test_zdnn_api_quantized_matmul(
default_input_shape, ZDNN_3DS, default_input_values, default_input_scale,
default_input_offset, INT8_MIN, INT8_MAX,
bcast_weights_shape, ZDNN_2D, default_weights_values,
symmetric_weights_scale, symmetric_weights_offset,
bcast_biases_shape, ZDNN_1D, default_biases_values, default_biases_scale,
default_biases_offset,
default_output_shape, ZDNN_3DS,
op_type, on_the_fly, ZDNN_OK, default_disable_clipping);
}
void quantized_matmul_on_the_fly_greater_equal_basic() {
zdnn_matmul_ops op_type = MATMUL_OP_GREATER_EQUAL;
bool on_the_fly = true;
test_zdnn_api_quantized_matmul(
default_input_shape, ZDNN_3DS, default_input_values, default_input_scale,
default_input_offset, INT8_MIN, INT8_MAX,
default_weights_shape, ZDNN_3DS, default_weights_values,
symmetric_weights_scale, symmetric_weights_offset,
default_biases_shape, ZDNN_2DS, default_biases_values,
default_biases_scale, default_biases_offset,
default_output_shape, ZDNN_3DS,
op_type, on_the_fly, ZDNN_OK, default_disable_clipping);
}
void quantized_matmul_on_the_fly_greater_equal_bcast1_basic() {
zdnn_matmul_ops op_type = MATMUL_OP_GREATER_EQUAL;
bool on_the_fly = true;
test_zdnn_api_quantized_matmul(
bcast_input_shape, ZDNN_2D, default_input_values, default_input_scale,
default_input_offset, INT8_MIN, INT8_MAX,
default_weights_shape, ZDNN_3DS, default_weights_values,
symmetric_weights_scale, symmetric_weights_offset,
default_biases_shape, ZDNN_2DS, default_biases_values,
default_biases_scale, default_biases_offset,
default_output_shape, ZDNN_3DS,
op_type, on_the_fly, ZDNN_OK, default_disable_clipping);
}
void quantized_matmul_on_the_fly_greater_equal_bcast23_basic() {
zdnn_matmul_ops op_type = MATMUL_OP_GREATER_EQUAL;
bool on_the_fly = true;
test_zdnn_api_quantized_matmul(
default_input_shape, ZDNN_3DS, default_input_values, default_input_scale,
default_input_offset, INT8_MIN, INT8_MAX,
bcast_weights_shape, ZDNN_2D, default_weights_values,
symmetric_weights_scale, symmetric_weights_offset,
bcast_biases_shape, ZDNN_1D, default_biases_values, default_biases_scale,
default_biases_offset,
default_output_shape, ZDNN_3DS,
op_type, on_the_fly, ZDNN_OK, default_disable_clipping);
}
void quantized_matmul_on_the_fly_equal_basic() {
zdnn_matmul_ops op_type = MATMUL_OP_EQUAL;
bool on_the_fly = true;
test_zdnn_api_quantized_matmul(
default_input_shape, ZDNN_3DS, default_input_values, default_input_scale,
default_input_offset, INT8_MIN, INT8_MAX,
default_weights_shape, ZDNN_3DS, default_weights_values,
symmetric_weights_scale, symmetric_weights_offset,
default_biases_shape, ZDNN_2DS, default_biases_values,
default_biases_scale, default_biases_offset,
default_output_shape, ZDNN_3DS,
op_type, on_the_fly, ZDNN_OK, default_disable_clipping);
}
void quantized_matmul_on_the_fly_equal_bcast1_basic() {
zdnn_matmul_ops op_type = MATMUL_OP_EQUAL;
bool on_the_fly = true;
test_zdnn_api_quantized_matmul(
bcast_input_shape, ZDNN_2D, default_input_values, default_input_scale,
default_input_offset, INT8_MIN, INT8_MAX,
default_weights_shape, ZDNN_3DS, default_weights_values,
symmetric_weights_scale, symmetric_weights_offset,
default_biases_shape, ZDNN_2DS, default_biases_values,
default_biases_scale, default_biases_offset,
default_output_shape, ZDNN_3DS,
op_type, on_the_fly, ZDNN_OK, default_disable_clipping);
}
void quantized_matmul_on_the_fly_equal_bcast23_basic() {
zdnn_matmul_ops op_type = MATMUL_OP_EQUAL;
bool on_the_fly = true;
test_zdnn_api_quantized_matmul(
default_input_shape, ZDNN_3DS, default_input_values, default_input_scale,
default_input_offset, INT8_MIN, INT8_MAX,
bcast_weights_shape, ZDNN_2D, default_weights_values,
symmetric_weights_scale, symmetric_weights_offset,
bcast_biases_shape, ZDNN_1D, default_biases_values, default_biases_scale,
default_biases_offset,
default_output_shape, ZDNN_3DS,
op_type, on_the_fly, ZDNN_OK, default_disable_clipping);
}
void quantized_matmul_on_the_fly_not_equal_basic() {
zdnn_matmul_ops op_type = MATMUL_OP_NOT_EQUAL;
bool on_the_fly = true;
test_zdnn_api_quantized_matmul(
default_input_shape, ZDNN_3DS, default_input_values, default_input_scale,
default_input_offset, INT8_MIN, INT8_MAX,
default_weights_shape, ZDNN_3DS, default_weights_values,
symmetric_weights_scale, symmetric_weights_offset,
default_biases_shape, ZDNN_2DS, default_biases_values,
default_biases_scale, default_biases_offset,
default_output_shape, ZDNN_3DS,
op_type, on_the_fly, ZDNN_OK, default_disable_clipping);
}
void quantized_matmul_on_the_fly_not_equal_bcast1_basic() {
zdnn_matmul_ops op_type = MATMUL_OP_NOT_EQUAL;
bool on_the_fly = true;
test_zdnn_api_quantized_matmul(
bcast_input_shape, ZDNN_2D, default_input_values, default_input_scale,
default_input_offset, INT8_MIN, INT8_MAX,
default_weights_shape, ZDNN_3DS, default_weights_values,
symmetric_weights_scale, symmetric_weights_offset,
default_biases_shape, ZDNN_2DS, default_biases_values,
default_biases_scale, default_biases_offset,
default_output_shape, ZDNN_3DS,
op_type, on_the_fly, ZDNN_OK, default_disable_clipping);
}
void quantized_matmul_on_the_fly_not_equal_bcast23_basic() {
zdnn_matmul_ops op_type = MATMUL_OP_NOT_EQUAL;
bool on_the_fly = true;
test_zdnn_api_quantized_matmul(
default_input_shape, ZDNN_3DS, default_input_values, default_input_scale,
default_input_offset, INT8_MIN, INT8_MAX,
bcast_weights_shape, ZDNN_2D, default_weights_values,
symmetric_weights_scale, symmetric_weights_offset,
bcast_biases_shape, ZDNN_1D, default_biases_values, default_biases_scale,
default_biases_offset,
default_output_shape, ZDNN_3DS,
op_type, on_the_fly, ZDNN_OK, default_disable_clipping);
}
void quantized_matmul_on_the_fly_lesser_equal_basic() {
zdnn_matmul_ops op_type = MATMUL_OP_LESSER_EQUAL;
bool on_the_fly = true;
test_zdnn_api_quantized_matmul(
default_input_shape, ZDNN_3DS, default_input_values, default_input_scale,
default_input_offset, INT8_MIN, INT8_MAX,
default_weights_shape, ZDNN_3DS, default_weights_values,
symmetric_weights_scale, symmetric_weights_offset,
default_biases_shape, ZDNN_2DS, default_biases_values,
default_biases_scale, default_biases_offset,
default_output_shape, ZDNN_3DS,
op_type, on_the_fly, ZDNN_OK, default_disable_clipping);
}
void quantized_matmul_on_the_fly_lesser_equal_bcast1_basic() {
zdnn_matmul_ops op_type = MATMUL_OP_LESSER_EQUAL;
bool on_the_fly = true;
test_zdnn_api_quantized_matmul(
bcast_input_shape, ZDNN_2D, default_input_values, default_input_scale,
default_input_offset, INT8_MIN, INT8_MAX,
default_weights_shape, ZDNN_3DS, default_weights_values,
symmetric_weights_scale, symmetric_weights_offset,
default_biases_shape, ZDNN_2DS, default_biases_values,
default_biases_scale, default_biases_offset,
default_output_shape, ZDNN_3DS,
op_type, on_the_fly, ZDNN_OK, default_disable_clipping);
}
void quantized_matmul_on_the_fly_lesser_equal_bcast23_basic() {
zdnn_matmul_ops op_type = MATMUL_OP_LESSER_EQUAL;
bool on_the_fly = true;
test_zdnn_api_quantized_matmul(
default_input_shape, ZDNN_3DS, default_input_values, default_input_scale,
default_input_offset, INT8_MIN, INT8_MAX,
bcast_weights_shape, ZDNN_2D, default_weights_values,
symmetric_weights_scale, symmetric_weights_offset,
bcast_biases_shape, ZDNN_1D, default_biases_values, default_biases_scale,
default_biases_offset,
default_output_shape, ZDNN_3DS,
op_type, on_the_fly, ZDNN_OK, default_disable_clipping);
}
void quantized_matmul_on_the_fly_lesser_basic() {
zdnn_matmul_ops op_type = MATMUL_OP_LESSER;
bool on_the_fly = true;
test_zdnn_api_quantized_matmul(
default_input_shape, ZDNN_3DS, default_input_values, default_input_scale,
default_input_offset, INT8_MIN, INT8_MAX,
default_weights_shape, ZDNN_3DS, default_weights_values,
symmetric_weights_scale, symmetric_weights_offset,
default_biases_shape, ZDNN_2DS, default_biases_values,
default_biases_scale, default_biases_offset,
default_output_shape, ZDNN_3DS,
op_type, on_the_fly, ZDNN_OK, default_disable_clipping);
}
void quantized_matmul_on_the_fly_lesser_bcast1_basic() {
zdnn_matmul_ops op_type = MATMUL_OP_LESSER;
bool on_the_fly = true;
test_zdnn_api_quantized_matmul(
bcast_input_shape, ZDNN_2D, default_input_values, default_input_scale,
default_input_offset, INT8_MIN, INT8_MAX,
default_weights_shape, ZDNN_3DS, default_weights_values,
symmetric_weights_scale, symmetric_weights_offset,
default_biases_shape, ZDNN_2DS, default_biases_values,
default_biases_scale, default_biases_offset,
default_output_shape, ZDNN_3DS,
op_type, on_the_fly, ZDNN_OK, default_disable_clipping);
}
void quantized_matmul_on_the_fly_lesser_bcast23_basic() {
zdnn_matmul_ops op_type = MATMUL_OP_LESSER;
bool on_the_fly = true;
test_zdnn_api_quantized_matmul(
default_input_shape, ZDNN_3DS, default_input_values, default_input_scale,
default_input_offset, INT8_MIN, INT8_MAX,
bcast_weights_shape, ZDNN_2D, default_weights_values,
symmetric_weights_scale, symmetric_weights_offset,
bcast_biases_shape, ZDNN_1D, default_biases_values, default_biases_scale,
default_biases_offset,
default_output_shape, ZDNN_3DS,
op_type, on_the_fly, ZDNN_OK, default_disable_clipping);
}
void quantized_matmul_greater_2x20x40_by_2x40x30() {
zdnn_matmul_ops op_type = MATMUL_OP_GREATER;
bool symmetric = true;
bool on_the_fly = false;
bool pre_compute = false;
quantized_matmul_smn_by_snp(2, 20, 40, 30, op_type, symmetric, on_the_fly,
pre_compute);
}
void quantized_matmul_greater_20x40_by_2x40x30() {
zdnn_matmul_ops op_type = MATMUL_OP_GREATER;
bool symmetric = true;
bool on_the_fly = false;
bool pre_compute = false;
quantized_matmul_mn_by_snp(2, 20, 40, 30, op_type, symmetric, on_the_fly,
pre_compute);
}
void quantized_matmul_greater_2x20x40_by_40x30() {
zdnn_matmul_ops op_type = MATMUL_OP_GREATER;
bool symmetric = true;
bool on_the_fly = false;
bool pre_compute = false;
quantized_matmul_smn_by_np(2, 20, 40, 30, op_type, symmetric, on_the_fly,
pre_compute);
}
void quantized_matmul_greater_equal_2x20x40_by_2x40x30() {
zdnn_matmul_ops op_type = MATMUL_OP_GREATER_EQUAL;
bool symmetric = true;
bool on_the_fly = false;
bool pre_compute = false;
quantized_matmul_smn_by_snp(2, 20, 40, 30, op_type, symmetric, on_the_fly,
pre_compute);
}
void quantized_matmul_greater_equal_20x40_by_2x40x30() {
zdnn_matmul_ops op_type = MATMUL_OP_GREATER_EQUAL;
bool symmetric = true;
bool on_the_fly = false;
bool pre_compute = false;
quantized_matmul_mn_by_snp(2, 20, 40, 30, op_type, symmetric, on_the_fly,
pre_compute);
}
void quantized_matmul_greater_equal_2x20x40_by_40x30() {
zdnn_matmul_ops op_type = MATMUL_OP_GREATER_EQUAL;
bool symmetric = true;
bool on_the_fly = false;
bool pre_compute = false;
quantized_matmul_smn_by_np(2, 20, 40, 30, op_type, symmetric, on_the_fly,
pre_compute);
}
void quantized_matmul_equal_2x20x40_by_2x40x30() {
zdnn_matmul_ops op_type = MATMUL_OP_EQUAL;
bool symmetric = true;
bool on_the_fly = false;
bool pre_compute = false;
quantized_matmul_smn_by_snp(2, 20, 40, 30, op_type, symmetric, on_the_fly,
pre_compute);
}
void quantized_matmul_equal_20x40_by_2x40x30() {
zdnn_matmul_ops op_type = MATMUL_OP_EQUAL;
bool symmetric = true;
bool on_the_fly = false;
bool pre_compute = false;
quantized_matmul_mn_by_snp(2, 20, 40, 30, op_type, symmetric, on_the_fly,
pre_compute);
}
void quantized_matmul_equal_2x20x40_by_40x30() {
zdnn_matmul_ops op_type = MATMUL_OP_EQUAL;
bool symmetric = true;
bool on_the_fly = false;
bool pre_compute = false;
quantized_matmul_smn_by_np(2, 20, 40, 30, op_type, symmetric, on_the_fly,
pre_compute);
}
void quantized_matmul_not_equal_2x20x40_by_2x40x30() {
zdnn_matmul_ops op_type = MATMUL_OP_NOT_EQUAL;
bool symmetric = true;
bool on_the_fly = false;
bool pre_compute = false;
quantized_matmul_smn_by_snp(2, 20, 40, 30, op_type, symmetric, on_the_fly,
pre_compute);
}
void quantized_matmul_not_equal_20x40_by_2x40x30() {
zdnn_matmul_ops op_type = MATMUL_OP_NOT_EQUAL;
bool symmetric = true;
bool on_the_fly = false;
bool pre_compute = false;
quantized_matmul_mn_by_snp(2, 20, 40, 30, op_type, symmetric, on_the_fly,
pre_compute);
}
void quantized_matmul_not_equal_2x20x40_by_40x30() {
zdnn_matmul_ops op_type = MATMUL_OP_NOT_EQUAL;
bool symmetric = true;
bool on_the_fly = false;
bool pre_compute = false;
quantized_matmul_smn_by_np(2, 20, 40, 30, op_type, symmetric, on_the_fly,
pre_compute);
}
void quantized_matmul_lesser_equal_2x20x40_by_2x40x30() {
zdnn_matmul_ops op_type = MATMUL_OP_LESSER_EQUAL;
bool symmetric = true;
bool on_the_fly = false;
bool pre_compute = false;
quantized_matmul_smn_by_snp(2, 20, 40, 30, op_type, symmetric, on_the_fly,
pre_compute);
}
void quantized_matmul_lesser_equal_20x40_by_2x40x30() {
zdnn_matmul_ops op_type = MATMUL_OP_LESSER_EQUAL;
bool symmetric = true;
bool on_the_fly = false;
bool pre_compute = false;
quantized_matmul_mn_by_snp(2, 20, 40, 30, op_type, symmetric, on_the_fly,
pre_compute);
}
void quantized_matmul_lesser_equal_2x20x40_by_40x30() {
zdnn_matmul_ops op_type = MATMUL_OP_LESSER_EQUAL;
bool symmetric = true;
bool on_the_fly = false;
bool pre_compute = false;
quantized_matmul_smn_by_np(2, 20, 40, 30, op_type, symmetric, on_the_fly,
pre_compute);
}
void quantized_matmul_lesser_2x20x40_by_2x40x30() {
zdnn_matmul_ops op_type = MATMUL_OP_LESSER;
bool symmetric = true;
bool on_the_fly = false;
bool pre_compute = false;
quantized_matmul_smn_by_snp(2, 20, 40, 30, op_type, symmetric, on_the_fly,
pre_compute);
}
void quantized_matmul_lesser_20x40_by_2x40x30() {
zdnn_matmul_ops op_type = MATMUL_OP_LESSER;
bool symmetric = true;
bool on_the_fly = false;
bool pre_compute = false;
quantized_matmul_mn_by_snp(2, 20, 40, 30, op_type, symmetric, on_the_fly,
pre_compute);
}
void quantized_matmul_lesser_2x20x40_by_40x30() {
zdnn_matmul_ops op_type = MATMUL_OP_LESSER;
bool symmetric = true;
bool on_the_fly = false;
bool pre_compute = false;
quantized_matmul_smn_by_np(2, 20, 40, 30, op_type, symmetric, on_the_fly,
pre_compute);
}
void quantized_matmul_greater_on_the_fly_2x20x40_by_2x40x30() {
zdnn_matmul_ops op_type = MATMUL_OP_GREATER;
bool symmetric = true;
bool on_the_fly = true;
bool pre_compute = false;
quantized_matmul_smn_by_snp(2, 20, 40, 30, op_type, symmetric, on_the_fly,
pre_compute);
}
void quantized_matmul_greater_on_the_fly_20x40_by_2x40x30() {
zdnn_matmul_ops op_type = MATMUL_OP_GREATER;
bool symmetric = true;
bool on_the_fly = true;
bool pre_compute = false;
quantized_matmul_mn_by_snp(2, 20, 40, 30, op_type, symmetric, on_the_fly,
pre_compute);
}
void quantized_matmul_greater_on_the_fly_2x20x40_by_40x30() {
zdnn_matmul_ops op_type = MATMUL_OP_GREATER;
bool symmetric = true;
bool on_the_fly = true;
bool pre_compute = false;
quantized_matmul_smn_by_np(2, 20, 40, 30, op_type, symmetric, on_the_fly,
pre_compute);
}
void quantized_matmul_greater_equal_on_the_fly_2x20x40_by_2x40x30() {
zdnn_matmul_ops op_type = MATMUL_OP_GREATER_EQUAL;
bool symmetric = true;
bool on_the_fly = true;
bool pre_compute = false;
quantized_matmul_smn_by_snp(2, 20, 40, 30, op_type, symmetric, on_the_fly,
pre_compute);
}
void quantized_matmul_greater_equal_on_the_fly_20x40_by_2x40x30() {
zdnn_matmul_ops op_type = MATMUL_OP_GREATER_EQUAL;
bool symmetric = true;
bool on_the_fly = true;
bool pre_compute = false;
quantized_matmul_mn_by_snp(2, 20, 40, 30, op_type, symmetric, on_the_fly,
pre_compute);
}
void quantized_matmul_greater_equal_on_the_fly_2x20x40_by_40x30() {
zdnn_matmul_ops op_type = MATMUL_OP_GREATER_EQUAL;
bool symmetric = true;
bool on_the_fly = true;
bool pre_compute = false;
quantized_matmul_smn_by_np(2, 20, 40, 30, op_type, symmetric, on_the_fly,
pre_compute);
}
void quantized_matmul_equal_on_the_fly_2x20x40_by_2x40x30() {
zdnn_matmul_ops op_type = MATMUL_OP_EQUAL;
bool symmetric = true;
bool on_the_fly = true;
bool pre_compute = false;
quantized_matmul_smn_by_snp(2, 20, 40, 30, op_type, symmetric, on_the_fly,
pre_compute);
}
void quantized_matmul_equal_on_the_fly_20x40_by_2x40x30() {
zdnn_matmul_ops op_type = MATMUL_OP_EQUAL;
bool symmetric = true;
bool on_the_fly = true;
bool pre_compute = false;
quantized_matmul_mn_by_snp(2, 20, 40, 30, op_type, symmetric, on_the_fly,
pre_compute);
}
void quantized_matmul_equal_on_the_fly_2x20x40_by_40x30() {
zdnn_matmul_ops op_type = MATMUL_OP_EQUAL;
bool symmetric = true;
bool on_the_fly = true;
bool pre_compute = false;
quantized_matmul_smn_by_np(2, 20, 40, 30, op_type, symmetric, on_the_fly,
pre_compute);
}
void quantized_matmul_not_equal_on_the_fly_2x20x40_by_2x40x30() {
zdnn_matmul_ops op_type = MATMUL_OP_NOT_EQUAL;
bool symmetric = true;
bool on_the_fly = true;
bool pre_compute = false;
quantized_matmul_smn_by_snp(2, 20, 40, 30, op_type, symmetric, on_the_fly,
pre_compute);
}
void quantized_matmul_not_equal_on_the_fly_20x40_by_2x40x30() {
zdnn_matmul_ops op_type = MATMUL_OP_NOT_EQUAL;
bool symmetric = true;
bool on_the_fly = true;
bool pre_compute = false;
quantized_matmul_mn_by_snp(2, 20, 40, 30, op_type, symmetric, on_the_fly,
pre_compute);
}
void quantized_matmul_not_equal_on_the_fly_2x20x40_by_40x30() {
zdnn_matmul_ops op_type = MATMUL_OP_NOT_EQUAL;
bool symmetric = true;
bool on_the_fly = true;
bool pre_compute = false;
quantized_matmul_smn_by_np(2, 20, 40, 30, op_type, symmetric, on_the_fly,
pre_compute);
}
void quantized_matmul_lesser_equal_on_the_fly_2x20x40_by_2x40x30() {
zdnn_matmul_ops op_type = MATMUL_OP_LESSER_EQUAL;
bool symmetric = true;
bool on_the_fly = true;
bool pre_compute = false;
quantized_matmul_smn_by_snp(2, 20, 40, 30, op_type, symmetric, on_the_fly,
pre_compute);
}
void quantized_matmul_lesser_equal_on_the_fly_20x40_by_2x40x30() {
zdnn_matmul_ops op_type = MATMUL_OP_LESSER_EQUAL;
bool symmetric = true;
bool on_the_fly = true;
bool pre_compute = false;
quantized_matmul_mn_by_snp(2, 20, 40, 30, op_type, symmetric, on_the_fly,
pre_compute);
}
void quantized_matmul_lesser_equal_on_the_fly_2x20x40_by_40x30() {
zdnn_matmul_ops op_type = MATMUL_OP_LESSER_EQUAL;
bool symmetric = true;
bool on_the_fly = true;
bool pre_compute = false;
quantized_matmul_smn_by_np(2, 20, 40, 30, op_type, symmetric, on_the_fly,
pre_compute);
}
void quantized_matmul_lesser_on_the_fly_2x20x40_by_2x40x30() {
zdnn_matmul_ops op_type = MATMUL_OP_LESSER;
bool symmetric = true;
bool on_the_fly = true;
bool pre_compute = false;
quantized_matmul_smn_by_snp(2, 20, 40, 30, op_type, symmetric, on_the_fly,
pre_compute);
}
void quantized_matmul_lesser_on_the_fly_20x40_by_2x40x30() {
zdnn_matmul_ops op_type = MATMUL_OP_LESSER;
bool symmetric = true;
bool on_the_fly = true;
bool pre_compute = false;
quantized_matmul_mn_by_snp(2, 20, 40, 30, op_type, symmetric, on_the_fly,
pre_compute);
}
void quantized_matmul_lesser_on_the_fly_2x20x40_by_40x30() {
zdnn_matmul_ops op_type = MATMUL_OP_LESSER;
bool symmetric = true;
bool on_the_fly = true;
bool pre_compute = false;
quantized_matmul_smn_by_np(2, 20, 40, 30, op_type, symmetric, on_the_fly,
pre_compute);
}
/******************************************************************************
Pre-Computed BiasAdd Tests
******************************************************************************/
void quantized_matmul_pre_comp_biasadd() {
zdnn_matmul_ops op_type = MATMUL_OP_ADDITION;
bool on_the_fly = false;
test_zdnn_api_quantized_matmul_pre_computed(
default_input_shape, ZDNN_3DS, default_input_values, default_input_scale,
default_input_offset, INT8_MIN, INT8_MAX,
default_weights_shape, ZDNN_3DS, default_weights_values,
symmetric_weights_scale, symmetric_weights_offset,
default_biases_shape, ZDNN_2DS, default_biases_values,
default_biases_scale, default_biases_offset,
default_output_shape, ZDNN_3DS,
op_type, on_the_fly, ZDNN_OK);
}
void quantized_matmul_pre_comp_biasadd_bcast1() {
zdnn_matmul_ops op_type = MATMUL_OP_ADDITION;
bool on_the_fly = false;
test_zdnn_api_quantized_matmul_pre_computed(
bcast_input_shape, ZDNN_2D, default_input_values, default_input_scale,
default_input_offset, INT8_MIN, INT8_MAX,
default_weights_shape, ZDNN_3DS, default_weights_values,
symmetric_weights_scale, symmetric_weights_offset,
default_biases_shape, ZDNN_2DS, default_biases_values,
default_biases_scale, default_biases_offset,
default_output_shape, ZDNN_3DS,
op_type, on_the_fly, ZDNN_OK);
}
void quantized_matmul_pre_comp_biasadd_bcast23() {
zdnn_matmul_ops op_type = MATMUL_OP_ADDITION;
bool on_the_fly = false;
test_zdnn_api_quantized_matmul_pre_computed(
default_input_shape, ZDNN_3DS, default_input_values, default_input_scale,
default_input_offset, INT8_MIN, INT8_MAX,
bcast_weights_shape, ZDNN_2D, default_weights_values,
symmetric_weights_scale, symmetric_weights_offset,
bcast_biases_shape, ZDNN_1D, default_biases_values, default_biases_scale,
default_biases_offset,
default_output_shape, ZDNN_3DS,
op_type, on_the_fly, ZDNN_OK);
}
// Quantized MatMul with unquantized input, which will quantize the input on the
// fly
void quantized_matmul_pre_comp_biasadd_on_the_fly() {
zdnn_matmul_ops op_type = MATMUL_OP_ADDITION;
bool on_the_fly = true;
test_zdnn_api_quantized_matmul_pre_computed(
default_input_shape, ZDNN_3DS, default_input_values, default_input_scale,
default_input_offset, INT8_MIN, INT8_MAX,
default_weights_shape, ZDNN_3DS, default_weights_values,
symmetric_weights_scale, symmetric_weights_offset,
default_biases_shape, ZDNN_2DS, default_biases_values,
default_biases_scale, default_biases_offset,
default_output_shape, ZDNN_3DS,
op_type, on_the_fly, ZDNN_OK);
}
void quantized_matmul_pre_comp_biasadd_bcast1_on_the_fly() {
zdnn_matmul_ops op_type = MATMUL_OP_ADDITION;
bool on_the_fly = true;
test_zdnn_api_quantized_matmul_pre_computed(
bcast_input_shape, ZDNN_2D, default_input_values, default_input_scale,
default_input_offset, INT8_MIN, INT8_MAX,
default_weights_shape, ZDNN_3DS, default_weights_values,
symmetric_weights_scale, symmetric_weights_offset,
default_biases_shape, ZDNN_2DS, default_biases_values,
default_biases_scale, default_biases_offset,
default_output_shape, ZDNN_3DS,
op_type, on_the_fly, ZDNN_OK);
}
void quantized_matmul_pre_comp_biasadd_bcast23_on_the_fly() {
zdnn_matmul_ops op_type = MATMUL_OP_ADDITION;
bool on_the_fly = true;
test_zdnn_api_quantized_matmul_pre_computed(
default_input_shape, ZDNN_3DS, default_input_values, default_input_scale,
default_input_offset, INT8_MIN, INT8_MAX,
bcast_weights_shape, ZDNN_2D, default_weights_values,
symmetric_weights_scale, symmetric_weights_offset,
bcast_biases_shape, ZDNN_1D, default_biases_values, default_biases_scale,
default_biases_offset,
default_output_shape, ZDNN_3DS,
op_type, on_the_fly, ZDNN_OK);
}
void quantized_matmul_pre_comp_biasadd_2x20x40_by_2x40x30() {
zdnn_matmul_ops op_type = MATMUL_OP_ADDITION;
bool symmetric = true;
bool on_the_fly = false;
bool pre_compute = true;
quantized_matmul_smn_by_snp(2, 20, 40, 30, op_type, symmetric, on_the_fly,
pre_compute);
}
void quantized_matmul_pre_comp_biasadd_20x40_by_2x40x30() {
zdnn_matmul_ops op_type = MATMUL_OP_ADDITION;
bool symmetric = true;
bool on_the_fly = false;
bool pre_compute = true;
quantized_matmul_mn_by_snp(2, 20, 40, 30, op_type, symmetric, on_the_fly,
pre_compute);
}
void quantized_matmul_pre_comp_biasadd_2x20x40_by_40x30() {
zdnn_matmul_ops op_type = MATMUL_OP_ADDITION;
bool symmetric = true;
bool on_the_fly = false;
bool pre_compute = true;
quantized_matmul_smn_by_np(2, 20, 40, 30, op_type, symmetric, on_the_fly,
pre_compute);
}
void quantized_matmul_pre_comp_biasadd_on_the_fly_2x20x40_by_2x40x30() {
zdnn_matmul_ops op_type = MATMUL_OP_ADDITION;
bool symmetric = true;
bool on_the_fly = true;
bool pre_compute = true;
quantized_matmul_smn_by_snp(2, 20, 40, 30, op_type, symmetric, on_the_fly,
pre_compute);
}
void quantized_matmul_pre_comp_biasadd_on_the_fly_20x40_by_2x40x30() {
zdnn_matmul_ops op_type = MATMUL_OP_ADDITION;
bool symmetric = true;
bool on_the_fly = true;
bool pre_compute = true;
quantized_matmul_mn_by_snp(2, 20, 40, 30, op_type, symmetric, on_the_fly,
pre_compute);
}
void quantized_matmul_pre_comp_biasadd_on_the_fly_2x20x40_by_40x30() {
zdnn_matmul_ops op_type = MATMUL_OP_ADDITION;
bool symmetric = true;
bool on_the_fly = true;
bool pre_compute = true;
quantized_matmul_smn_by_np(2, 20, 40, 30, op_type, symmetric, on_the_fly,
pre_compute);
}
/******************************************************************************
Pre-Computed Compare Tests
******************************************************************************/
void quantized_matmul_pre_comp_greater_basic() {
zdnn_matmul_ops op_type = MATMUL_OP_GREATER;
bool on_the_fly = false;
test_zdnn_api_quantized_matmul_pre_computed(
default_input_shape, ZDNN_3DS, default_input_values, default_input_scale,
default_input_offset, INT8_MIN, INT8_MAX,
default_weights_shape, ZDNN_3DS, default_weights_values,
symmetric_weights_scale, symmetric_weights_offset,
default_biases_shape, ZDNN_2DS, default_biases_values,
default_biases_scale, default_biases_offset,
default_output_shape, ZDNN_3DS,
op_type, on_the_fly, ZDNN_OK);
}
void quantized_matmul_pre_comp_greater_bcast1_basic() {
zdnn_matmul_ops op_type = MATMUL_OP_GREATER;
bool on_the_fly = false;
test_zdnn_api_quantized_matmul_pre_computed(
bcast_input_shape, ZDNN_2D, default_input_values, default_input_scale,
default_input_offset, INT8_MIN, INT8_MAX,
default_weights_shape, ZDNN_3DS, default_weights_values,
symmetric_weights_scale, symmetric_weights_offset,
default_biases_shape, ZDNN_2DS, default_biases_values,
default_biases_scale, default_biases_offset,
default_output_shape, ZDNN_3DS,
op_type, on_the_fly, ZDNN_OK);
}
void quantized_matmul_pre_comp_greater_bcast23_basic() {
zdnn_matmul_ops op_type = MATMUL_OP_GREATER;
bool on_the_fly = false;
test_zdnn_api_quantized_matmul_pre_computed(
default_input_shape, ZDNN_3DS, default_input_values, default_input_scale,
default_input_offset, INT8_MIN, INT8_MAX,
bcast_weights_shape, ZDNN_2D, default_weights_values,
symmetric_weights_scale, symmetric_weights_offset,
bcast_biases_shape, ZDNN_1D, default_biases_values, default_biases_scale,
default_biases_offset,
default_output_shape, ZDNN_3DS,
op_type, on_the_fly, ZDNN_OK);
}
void quantized_matmul_pre_comp_greater_equal_basic() {
zdnn_matmul_ops op_type = MATMUL_OP_GREATER_EQUAL;
bool on_the_fly = false;
test_zdnn_api_quantized_matmul_pre_computed(
default_input_shape, ZDNN_3DS, default_input_values, default_input_scale,
default_input_offset, INT8_MIN, INT8_MAX,
default_weights_shape, ZDNN_3DS, default_weights_values,
symmetric_weights_scale, symmetric_weights_offset,
default_biases_shape, ZDNN_2DS, default_biases_values,
default_biases_scale, default_biases_offset,
default_output_shape, ZDNN_3DS,
op_type, on_the_fly, ZDNN_OK);
}
void quantized_matmul_pre_comp_greater_equal_bcast1_basic() {
zdnn_matmul_ops op_type = MATMUL_OP_GREATER_EQUAL;
bool on_the_fly = false;
test_zdnn_api_quantized_matmul_pre_computed(
bcast_input_shape, ZDNN_2D, default_input_values, default_input_scale,
default_input_offset, INT8_MIN, INT8_MAX,
default_weights_shape, ZDNN_3DS, default_weights_values,
symmetric_weights_scale, symmetric_weights_offset,
default_biases_shape, ZDNN_2DS, default_biases_values,
default_biases_scale, default_biases_offset,
default_output_shape, ZDNN_3DS,
op_type, on_the_fly, ZDNN_OK);
}
void quantized_matmul_pre_comp_greater_equal_bcast23_basic() {
zdnn_matmul_ops op_type = MATMUL_OP_GREATER_EQUAL;
bool on_the_fly = false;
test_zdnn_api_quantized_matmul_pre_computed(
default_input_shape, ZDNN_3DS, default_input_values, default_input_scale,
default_input_offset, INT8_MIN, INT8_MAX,
bcast_weights_shape, ZDNN_2D, default_weights_values,
symmetric_weights_scale, symmetric_weights_offset,
bcast_biases_shape, ZDNN_1D, default_biases_values, default_biases_scale,
default_biases_offset,
default_output_shape, ZDNN_3DS,
op_type, on_the_fly, ZDNN_OK);
}
void quantized_matmul_pre_comp_equal_basic() {
zdnn_matmul_ops op_type = MATMUL_OP_EQUAL;
bool on_the_fly = false;
test_zdnn_api_quantized_matmul_pre_computed(
default_input_shape, ZDNN_3DS, default_input_values, default_input_scale,
default_input_offset, INT8_MIN, INT8_MAX,
default_weights_shape, ZDNN_3DS, default_weights_values,
symmetric_weights_scale, symmetric_weights_offset,
default_biases_shape, ZDNN_2DS, default_biases_values,
default_biases_scale, default_biases_offset,
default_output_shape, ZDNN_3DS,
op_type, on_the_fly, ZDNN_OK);
}
void quantized_matmul_pre_comp_equal_bcast1_basic() {
zdnn_matmul_ops op_type = MATMUL_OP_EQUAL;
bool on_the_fly = false;
test_zdnn_api_quantized_matmul_pre_computed(
bcast_input_shape, ZDNN_2D, default_input_values, default_input_scale,
default_input_offset, INT8_MIN, INT8_MAX,
default_weights_shape, ZDNN_3DS, default_weights_values,
symmetric_weights_scale, symmetric_weights_offset,
default_biases_shape, ZDNN_2DS, default_biases_values,
default_biases_scale, default_biases_offset,
default_output_shape, ZDNN_3DS,
op_type, on_the_fly, ZDNN_OK);
}
void quantized_matmul_pre_comp_equal_bcast23_basic() {
zdnn_matmul_ops op_type = MATMUL_OP_EQUAL;
bool on_the_fly = false;
test_zdnn_api_quantized_matmul_pre_computed(
default_input_shape, ZDNN_3DS, default_input_values, default_input_scale,
default_input_offset, INT8_MIN, INT8_MAX,
bcast_weights_shape, ZDNN_2D, default_weights_values,
symmetric_weights_scale, symmetric_weights_offset,
bcast_biases_shape, ZDNN_1D, default_biases_values, default_biases_scale,
default_biases_offset,
default_output_shape, ZDNN_3DS,
op_type, on_the_fly, ZDNN_OK);
}
void quantized_matmul_pre_comp_not_equal_basic() {
zdnn_matmul_ops op_type = MATMUL_OP_NOT_EQUAL;
bool on_the_fly = false;
test_zdnn_api_quantized_matmul_pre_computed(
default_input_shape, ZDNN_3DS, default_input_values, default_input_scale,
default_input_offset, INT8_MIN, INT8_MAX,
default_weights_shape, ZDNN_3DS, default_weights_values,
symmetric_weights_scale, symmetric_weights_offset,
default_biases_shape, ZDNN_2DS, default_biases_values,
default_biases_scale, default_biases_offset,
default_output_shape, ZDNN_3DS,
op_type, on_the_fly, ZDNN_OK);
}
void quantized_matmul_pre_comp_not_equal_bcast1_basic() {
zdnn_matmul_ops op_type = MATMUL_OP_NOT_EQUAL;
bool on_the_fly = false;
test_zdnn_api_quantized_matmul_pre_computed(
bcast_input_shape, ZDNN_2D, default_input_values, default_input_scale,
default_input_offset, INT8_MIN, INT8_MAX,
default_weights_shape, ZDNN_3DS, default_weights_values,
symmetric_weights_scale, symmetric_weights_offset,
default_biases_shape, ZDNN_2DS, default_biases_values,
default_biases_scale, default_biases_offset,
default_output_shape, ZDNN_3DS,
op_type, on_the_fly, ZDNN_OK);
}
void quantized_matmul_pre_comp_not_equal_bcast23_basic() {
zdnn_matmul_ops op_type = MATMUL_OP_NOT_EQUAL;
bool on_the_fly = false;
test_zdnn_api_quantized_matmul_pre_computed(
default_input_shape, ZDNN_3DS, default_input_values, default_input_scale,
default_input_offset, INT8_MIN, INT8_MAX,
bcast_weights_shape, ZDNN_2D, default_weights_values,
symmetric_weights_scale, symmetric_weights_offset,
bcast_biases_shape, ZDNN_1D, default_biases_values, default_biases_scale,
default_biases_offset,
default_output_shape, ZDNN_3DS,
op_type, on_the_fly, ZDNN_OK);
}
void quantized_matmul_pre_comp_lesser_equal_basic() {
zdnn_matmul_ops op_type = MATMUL_OP_LESSER_EQUAL;
bool on_the_fly = false;
test_zdnn_api_quantized_matmul_pre_computed(
default_input_shape, ZDNN_3DS, default_input_values, default_input_scale,
default_input_offset, INT8_MIN, INT8_MAX,
default_weights_shape, ZDNN_3DS, default_weights_values,
symmetric_weights_scale, symmetric_weights_offset,
default_biases_shape, ZDNN_2DS, default_biases_values,
default_biases_scale, default_biases_offset,
default_output_shape, ZDNN_3DS,
op_type, on_the_fly, ZDNN_OK);
}
void quantized_matmul_pre_comp_lesser_equal_bcast1_basic() {
zdnn_matmul_ops op_type = MATMUL_OP_LESSER_EQUAL;
bool on_the_fly = false;
test_zdnn_api_quantized_matmul_pre_computed(
bcast_input_shape, ZDNN_2D, default_input_values, default_input_scale,
default_input_offset, INT8_MIN, INT8_MAX,
default_weights_shape, ZDNN_3DS, default_weights_values,
symmetric_weights_scale, symmetric_weights_offset,
default_biases_shape, ZDNN_2DS, default_biases_values,
default_biases_scale, default_biases_offset,
default_output_shape, ZDNN_3DS,
op_type, on_the_fly, ZDNN_OK);
}
void quantized_matmul_pre_comp_lesser_equal_bcast23_basic() {
zdnn_matmul_ops op_type = MATMUL_OP_LESSER_EQUAL;
bool on_the_fly = false;
test_zdnn_api_quantized_matmul_pre_computed(
default_input_shape, ZDNN_3DS, default_input_values, default_input_scale,
default_input_offset, INT8_MIN, INT8_MAX,
bcast_weights_shape, ZDNN_2D, default_weights_values,
symmetric_weights_scale, symmetric_weights_offset,
bcast_biases_shape, ZDNN_1D, default_biases_values, default_biases_scale,
default_biases_offset,
default_output_shape, ZDNN_3DS,
op_type, on_the_fly, ZDNN_OK);
}
void quantized_matmul_pre_comp_lesser_basic() {
zdnn_matmul_ops op_type = MATMUL_OP_LESSER;
bool on_the_fly = false;
test_zdnn_api_quantized_matmul_pre_computed(
default_input_shape, ZDNN_3DS, default_input_values, default_input_scale,
default_input_offset, INT8_MIN, INT8_MAX,
default_weights_shape, ZDNN_3DS, default_weights_values,
symmetric_weights_scale, symmetric_weights_offset,
default_biases_shape, ZDNN_2DS, default_biases_values,
default_biases_scale, default_biases_offset,
default_output_shape, ZDNN_3DS,
op_type, on_the_fly, ZDNN_OK);
}
void quantized_matmul_pre_comp_lesser_bcast1_basic() {
zdnn_matmul_ops op_type = MATMUL_OP_LESSER;
bool on_the_fly = false;
test_zdnn_api_quantized_matmul_pre_computed(
bcast_input_shape, ZDNN_2D, default_input_values, default_input_scale,
default_input_offset, INT8_MIN, INT8_MAX,
default_weights_shape, ZDNN_3DS, default_weights_values,
symmetric_weights_scale, symmetric_weights_offset,
default_biases_shape, ZDNN_2DS, default_biases_values,
default_biases_scale, default_biases_offset,
default_output_shape, ZDNN_3DS,
op_type, on_the_fly, ZDNN_OK);
}
void quantized_matmul_pre_comp_lesser_bcast23_basic() {
zdnn_matmul_ops op_type = MATMUL_OP_LESSER;
bool on_the_fly = false;
test_zdnn_api_quantized_matmul_pre_computed(
default_input_shape, ZDNN_3DS, default_input_values, default_input_scale,
default_input_offset, INT8_MIN, INT8_MAX,
bcast_weights_shape, ZDNN_2D, default_weights_values,
symmetric_weights_scale, symmetric_weights_offset,
bcast_biases_shape, ZDNN_1D, default_biases_values, default_biases_scale,
default_biases_offset,
default_output_shape, ZDNN_3DS,
op_type, on_the_fly, ZDNN_OK);
}
void quantized_matmul_pre_comp_on_the_fly_greater_basic() {
zdnn_matmul_ops op_type = MATMUL_OP_GREATER;
bool on_the_fly = true;
test_zdnn_api_quantized_matmul_pre_computed(
default_input_shape, ZDNN_3DS, default_input_values, default_input_scale,
default_input_offset, INT8_MIN, INT8_MAX,
default_weights_shape, ZDNN_3DS, default_weights_values,
symmetric_weights_scale, symmetric_weights_offset,
default_biases_shape, ZDNN_2DS, default_biases_values,
default_biases_scale, default_biases_offset,
default_output_shape, ZDNN_3DS,
op_type, on_the_fly, ZDNN_OK);
}
void quantized_matmul_pre_comp_on_the_fly_greater_bcast1_basic() {
zdnn_matmul_ops op_type = MATMUL_OP_GREATER;
bool on_the_fly = true;
test_zdnn_api_quantized_matmul_pre_computed(
bcast_input_shape, ZDNN_2D, default_input_values, default_input_scale,
default_input_offset, INT8_MIN, INT8_MAX,
default_weights_shape, ZDNN_3DS, default_weights_values,
symmetric_weights_scale, symmetric_weights_offset,
default_biases_shape, ZDNN_2DS, default_biases_values,
default_biases_scale, default_biases_offset,
default_output_shape, ZDNN_3DS,
op_type, on_the_fly, ZDNN_OK);
}
void quantized_matmul_pre_comp_on_the_fly_greater_bcast23_basic() {
zdnn_matmul_ops op_type = MATMUL_OP_GREATER;
bool on_the_fly = true;
test_zdnn_api_quantized_matmul_pre_computed(
default_input_shape, ZDNN_3DS, default_input_values, default_input_scale,
default_input_offset, INT8_MIN, INT8_MAX,
bcast_weights_shape, ZDNN_2D, default_weights_values,
symmetric_weights_scale, symmetric_weights_offset,
bcast_biases_shape, ZDNN_1D, default_biases_values, default_biases_scale,
default_biases_offset,
default_output_shape, ZDNN_3DS,
op_type, on_the_fly, ZDNN_OK);
}
void quantized_matmul_pre_comp_on_the_fly_greater_equal_basic() {
zdnn_matmul_ops op_type = MATMUL_OP_GREATER_EQUAL;
bool on_the_fly = true;
test_zdnn_api_quantized_matmul_pre_computed(
default_input_shape, ZDNN_3DS, default_input_values, default_input_scale,
default_input_offset, INT8_MIN, INT8_MAX,
default_weights_shape, ZDNN_3DS, default_weights_values,
symmetric_weights_scale, symmetric_weights_offset,
default_biases_shape, ZDNN_2DS, default_biases_values,
default_biases_scale, default_biases_offset,
default_output_shape, ZDNN_3DS,
op_type, on_the_fly, ZDNN_OK);
}
void quantized_matmul_pre_comp_on_the_fly_greater_equal_bcast1_basic() {
zdnn_matmul_ops op_type = MATMUL_OP_GREATER_EQUAL;
bool on_the_fly = true;
test_zdnn_api_quantized_matmul_pre_computed(
bcast_input_shape, ZDNN_2D, default_input_values, default_input_scale,
default_input_offset, INT8_MIN, INT8_MAX,
default_weights_shape, ZDNN_3DS, default_weights_values,
symmetric_weights_scale, symmetric_weights_offset,
default_biases_shape, ZDNN_2DS, default_biases_values,
default_biases_scale, default_biases_offset,
default_output_shape, ZDNN_3DS,
op_type, on_the_fly, ZDNN_OK);
}
void quantized_matmul_pre_comp_on_the_fly_greater_equal_bcast23_basic() {
zdnn_matmul_ops op_type = MATMUL_OP_GREATER_EQUAL;
bool on_the_fly = true;
test_zdnn_api_quantized_matmul_pre_computed(
default_input_shape, ZDNN_3DS, default_input_values, default_input_scale,
default_input_offset, INT8_MIN, INT8_MAX,
bcast_weights_shape, ZDNN_2D, default_weights_values,
symmetric_weights_scale, symmetric_weights_offset,
bcast_biases_shape, ZDNN_1D, default_biases_values, default_biases_scale,
default_biases_offset,
default_output_shape, ZDNN_3DS,
op_type, on_the_fly, ZDNN_OK);
}
void quantized_matmul_pre_comp_on_the_fly_equal_basic() {
zdnn_matmul_ops op_type = MATMUL_OP_EQUAL;
bool on_the_fly = true;
test_zdnn_api_quantized_matmul_pre_computed(
default_input_shape, ZDNN_3DS, default_input_values, default_input_scale,
default_input_offset, INT8_MIN, INT8_MAX,
default_weights_shape, ZDNN_3DS, default_weights_values,
symmetric_weights_scale, symmetric_weights_offset,
default_biases_shape, ZDNN_2DS, default_biases_values,
default_biases_scale, default_biases_offset,
default_output_shape, ZDNN_3DS,
op_type, on_the_fly, ZDNN_OK);
}
void quantized_matmul_pre_comp_on_the_fly_equal_bcast1_basic() {
zdnn_matmul_ops op_type = MATMUL_OP_EQUAL;
bool on_the_fly = true;
test_zdnn_api_quantized_matmul_pre_computed(
bcast_input_shape, ZDNN_2D, default_input_values, default_input_scale,
default_input_offset, INT8_MIN, INT8_MAX,
default_weights_shape, ZDNN_3DS, default_weights_values,
symmetric_weights_scale, symmetric_weights_offset,
default_biases_shape, ZDNN_2DS, default_biases_values,
default_biases_scale, default_biases_offset,
default_output_shape, ZDNN_3DS,
op_type, on_the_fly, ZDNN_OK);
}
void quantized_matmul_pre_comp_on_the_fly_equal_bcast23_basic() {
zdnn_matmul_ops op_type = MATMUL_OP_EQUAL;
bool on_the_fly = true;
test_zdnn_api_quantized_matmul_pre_computed(
default_input_shape, ZDNN_3DS, default_input_values, default_input_scale,
default_input_offset, INT8_MIN, INT8_MAX,
bcast_weights_shape, ZDNN_2D, default_weights_values,
symmetric_weights_scale, symmetric_weights_offset,
bcast_biases_shape, ZDNN_1D, default_biases_values, default_biases_scale,
default_biases_offset,
default_output_shape, ZDNN_3DS,
op_type, on_the_fly, ZDNN_OK);
}
void quantized_matmul_pre_comp_on_the_fly_not_equal_basic() {
zdnn_matmul_ops op_type = MATMUL_OP_NOT_EQUAL;
bool on_the_fly = true;
test_zdnn_api_quantized_matmul_pre_computed(
default_input_shape, ZDNN_3DS, default_input_values, default_input_scale,
default_input_offset, INT8_MIN, INT8_MAX,
default_weights_shape, ZDNN_3DS, default_weights_values,
symmetric_weights_scale, symmetric_weights_offset,
default_biases_shape, ZDNN_2DS, default_biases_values,
default_biases_scale, default_biases_offset,
default_output_shape, ZDNN_3DS,
op_type, on_the_fly, ZDNN_OK);
}
void quantized_matmul_pre_comp_on_the_fly_not_equal_bcast1_basic() {
zdnn_matmul_ops op_type = MATMUL_OP_NOT_EQUAL;
bool on_the_fly = true;
test_zdnn_api_quantized_matmul_pre_computed(
bcast_input_shape, ZDNN_2D, default_input_values, default_input_scale,
default_input_offset, INT8_MIN, INT8_MAX,
default_weights_shape, ZDNN_3DS, default_weights_values,
symmetric_weights_scale, symmetric_weights_offset,
default_biases_shape, ZDNN_2DS, default_biases_values,
default_biases_scale, default_biases_offset,
default_output_shape, ZDNN_3DS,
op_type, on_the_fly, ZDNN_OK);
}
void quantized_matmul_pre_comp_on_the_fly_not_equal_bcast23_basic() {
zdnn_matmul_ops op_type = MATMUL_OP_NOT_EQUAL;
bool on_the_fly = true;
test_zdnn_api_quantized_matmul_pre_computed(
default_input_shape, ZDNN_3DS, default_input_values, default_input_scale,
default_input_offset, INT8_MIN, INT8_MAX,
bcast_weights_shape, ZDNN_2D, default_weights_values,
symmetric_weights_scale, symmetric_weights_offset,
bcast_biases_shape, ZDNN_1D, default_biases_values, default_biases_scale,
default_biases_offset,
default_output_shape, ZDNN_3DS,
op_type, on_the_fly, ZDNN_OK);
}
void quantized_matmul_pre_comp_on_the_fly_lesser_equal_basic() {
zdnn_matmul_ops op_type = MATMUL_OP_LESSER_EQUAL;
bool on_the_fly = true;
test_zdnn_api_quantized_matmul_pre_computed(
default_input_shape, ZDNN_3DS, default_input_values, default_input_scale,
default_input_offset, INT8_MIN, INT8_MAX,
default_weights_shape, ZDNN_3DS, default_weights_values,
symmetric_weights_scale, symmetric_weights_offset,
default_biases_shape, ZDNN_2DS, default_biases_values,
default_biases_scale, default_biases_offset,
default_output_shape, ZDNN_3DS,
op_type, on_the_fly, ZDNN_OK);
}
void quantized_matmul_pre_comp_on_the_fly_lesser_equal_bcast1_basic() {
zdnn_matmul_ops op_type = MATMUL_OP_LESSER_EQUAL;
bool on_the_fly = true;
test_zdnn_api_quantized_matmul_pre_computed(
bcast_input_shape, ZDNN_2D, default_input_values, default_input_scale,
default_input_offset, INT8_MIN, INT8_MAX,
default_weights_shape, ZDNN_3DS, default_weights_values,
symmetric_weights_scale, symmetric_weights_offset,
default_biases_shape, ZDNN_2DS, default_biases_values,
default_biases_scale, default_biases_offset,
default_output_shape, ZDNN_3DS,
op_type, on_the_fly, ZDNN_OK);
}
void quantized_matmul_pre_comp_on_the_fly_lesser_equal_bcast23_basic() {
zdnn_matmul_ops op_type = MATMUL_OP_LESSER_EQUAL;
bool on_the_fly = true;
test_zdnn_api_quantized_matmul_pre_computed(
default_input_shape, ZDNN_3DS, default_input_values, default_input_scale,
default_input_offset, INT8_MIN, INT8_MAX,
bcast_weights_shape, ZDNN_2D, default_weights_values,
symmetric_weights_scale, symmetric_weights_offset,
bcast_biases_shape, ZDNN_1D, default_biases_values, default_biases_scale,
default_biases_offset,
default_output_shape, ZDNN_3DS,
op_type, on_the_fly, ZDNN_OK);
}
void quantized_matmul_pre_comp_on_the_fly_lesser_basic() {
zdnn_matmul_ops op_type = MATMUL_OP_LESSER;
bool on_the_fly = true;
test_zdnn_api_quantized_matmul_pre_computed(
default_input_shape, ZDNN_3DS, default_input_values, default_input_scale,
default_input_offset, INT8_MIN, INT8_MAX,
default_weights_shape, ZDNN_3DS, default_weights_values,
symmetric_weights_scale, symmetric_weights_offset,
default_biases_shape, ZDNN_2DS, default_biases_values,
default_biases_scale, default_biases_offset,
default_output_shape, ZDNN_3DS,
op_type, on_the_fly, ZDNN_OK);
}
void quantized_matmul_pre_comp_on_the_fly_lesser_bcast1_basic() {
zdnn_matmul_ops op_type = MATMUL_OP_LESSER;
bool on_the_fly = true;
test_zdnn_api_quantized_matmul_pre_computed(
bcast_input_shape, ZDNN_2D, default_input_values, default_input_scale,
default_input_offset, INT8_MIN, INT8_MAX,
default_weights_shape, ZDNN_3DS, default_weights_values,
symmetric_weights_scale, symmetric_weights_offset,
default_biases_shape, ZDNN_2DS, default_biases_values,
default_biases_scale, default_biases_offset,
default_output_shape, ZDNN_3DS,
op_type, on_the_fly, ZDNN_OK);
}
void quantized_matmul_pre_comp_on_the_fly_lesser_bcast23_basic() {
zdnn_matmul_ops op_type = MATMUL_OP_LESSER;
bool on_the_fly = true;
test_zdnn_api_quantized_matmul_pre_computed(
default_input_shape, ZDNN_3DS, default_input_values, default_input_scale,
default_input_offset, INT8_MIN, INT8_MAX,
bcast_weights_shape, ZDNN_2D, default_weights_values,
symmetric_weights_scale, symmetric_weights_offset,
bcast_biases_shape, ZDNN_1D, default_biases_values, default_biases_scale,
default_biases_offset,
default_output_shape, ZDNN_3DS,
op_type, on_the_fly, ZDNN_OK);
}
void quantized_matmul_pre_comp_greater_2x20x40_by_2x40x30() {
zdnn_matmul_ops op_type = MATMUL_OP_GREATER;
bool symmetric = true;
bool on_the_fly = false;
bool pre_compute = true;
quantized_matmul_smn_by_snp(2, 20, 40, 30, op_type, symmetric, on_the_fly,
pre_compute);
}
void quantized_matmul_pre_comp_greater_20x40_by_2x40x30() {
zdnn_matmul_ops op_type = MATMUL_OP_GREATER;
bool symmetric = true;
bool on_the_fly = false;
bool pre_compute = true;
quantized_matmul_mn_by_snp(2, 20, 40, 30, op_type, symmetric, on_the_fly,
pre_compute);
}
void quantized_matmul_pre_comp_greater_2x20x40_by_40x30() {
zdnn_matmul_ops op_type = MATMUL_OP_GREATER;
bool symmetric = true;
bool on_the_fly = false;
bool pre_compute = true;
quantized_matmul_smn_by_np(2, 20, 40, 30, op_type, symmetric, on_the_fly,
pre_compute);
}
void quantized_matmul_pre_comp_greater_equal_2x20x40_by_2x40x30() {
zdnn_matmul_ops op_type = MATMUL_OP_GREATER_EQUAL;
bool symmetric = true;
bool on_the_fly = false;
bool pre_compute = true;
quantized_matmul_smn_by_snp(2, 20, 40, 30, op_type, symmetric, on_the_fly,
pre_compute);
}
void quantized_matmul_pre_comp_greater_equal_20x40_by_2x40x30() {
zdnn_matmul_ops op_type = MATMUL_OP_GREATER_EQUAL;
bool symmetric = true;
bool on_the_fly = false;
bool pre_compute = true;
quantized_matmul_mn_by_snp(2, 20, 40, 30, op_type, symmetric, on_the_fly,
pre_compute);
}
void quantized_matmul_pre_comp_greater_equal_2x20x40_by_40x30() {
zdnn_matmul_ops op_type = MATMUL_OP_GREATER_EQUAL;
bool symmetric = true;
bool on_the_fly = false;
bool pre_compute = true;
quantized_matmul_smn_by_np(2, 20, 40, 30, op_type, symmetric, on_the_fly,
pre_compute);
}
void quantized_matmul_pre_comp_equal_2x20x40_by_2x40x30() {
zdnn_matmul_ops op_type = MATMUL_OP_EQUAL;
bool symmetric = true;
bool on_the_fly = false;
bool pre_compute = true;
quantized_matmul_smn_by_snp(2, 20, 40, 30, op_type, symmetric, on_the_fly,
pre_compute);
}
void quantized_matmul_pre_comp_equal_20x40_by_2x40x30() {
zdnn_matmul_ops op_type = MATMUL_OP_EQUAL;
bool symmetric = true;
bool on_the_fly = false;
bool pre_compute = true;
quantized_matmul_mn_by_snp(2, 20, 40, 30, op_type, symmetric, on_the_fly,
pre_compute);
}
void quantized_matmul_pre_comp_equal_2x20x40_by_40x30() {
zdnn_matmul_ops op_type = MATMUL_OP_EQUAL;
bool symmetric = true;
bool on_the_fly = false;
bool pre_compute = true;
quantized_matmul_smn_by_np(2, 20, 40, 30, op_type, symmetric, on_the_fly,
pre_compute);
}
void quantized_matmul_pre_comp_not_equal_2x20x40_by_2x40x30() {
zdnn_matmul_ops op_type = MATMUL_OP_NOT_EQUAL;
bool symmetric = true;
bool on_the_fly = false;
bool pre_compute = true;
quantized_matmul_smn_by_snp(2, 20, 40, 30, op_type, symmetric, on_the_fly,
pre_compute);
}
void quantized_matmul_pre_comp_not_equal_20x40_by_2x40x30() {
zdnn_matmul_ops op_type = MATMUL_OP_NOT_EQUAL;
bool symmetric = true;
bool on_the_fly = false;
bool pre_compute = true;
quantized_matmul_mn_by_snp(2, 20, 40, 30, op_type, symmetric, on_the_fly,
pre_compute);
}
void quantized_matmul_pre_comp_not_equal_2x20x40_by_40x30() {
zdnn_matmul_ops op_type = MATMUL_OP_NOT_EQUAL;
bool symmetric = true;
bool on_the_fly = false;
bool pre_compute = true;
quantized_matmul_smn_by_np(2, 20, 40, 30, op_type, symmetric, on_the_fly,
pre_compute);
}
void quantized_matmul_pre_comp_lesser_equal_2x20x40_by_2x40x30() {
zdnn_matmul_ops op_type = MATMUL_OP_LESSER_EQUAL;
bool symmetric = true;
bool on_the_fly = false;
bool pre_compute = true;
quantized_matmul_smn_by_snp(2, 20, 40, 30, op_type, symmetric, on_the_fly,
pre_compute);
}
void quantized_matmul_pre_comp_lesser_equal_20x40_by_2x40x30() {
zdnn_matmul_ops op_type = MATMUL_OP_LESSER_EQUAL;
bool symmetric = true;
bool on_the_fly = false;
bool pre_compute = true;
quantized_matmul_mn_by_snp(2, 20, 40, 30, op_type, symmetric, on_the_fly,
pre_compute);
}
void quantized_matmul_pre_comp_lesser_equal_2x20x40_by_40x30() {
zdnn_matmul_ops op_type = MATMUL_OP_LESSER_EQUAL;
bool symmetric = true;
bool on_the_fly = false;
bool pre_compute = true;
quantized_matmul_smn_by_np(2, 20, 40, 30, op_type, symmetric, on_the_fly,
pre_compute);
}
void quantized_matmul_pre_comp_lesser_2x20x40_by_2x40x30() {
zdnn_matmul_ops op_type = MATMUL_OP_LESSER;
bool symmetric = true;
bool on_the_fly = false;
bool pre_compute = true;
quantized_matmul_smn_by_snp(2, 20, 40, 30, op_type, symmetric, on_the_fly,
pre_compute);
}
void quantized_matmul_pre_comp_lesser_20x40_by_2x40x30() {
zdnn_matmul_ops op_type = MATMUL_OP_LESSER;
bool symmetric = true;
bool on_the_fly = false;
bool pre_compute = true;
quantized_matmul_mn_by_snp(2, 20, 40, 30, op_type, symmetric, on_the_fly,
pre_compute);
}
void quantized_matmul_pre_comp_lesser_2x20x40_by_40x30() {
zdnn_matmul_ops op_type = MATMUL_OP_LESSER;
bool symmetric = true;
bool on_the_fly = false;
bool pre_compute = true;
quantized_matmul_smn_by_np(2, 20, 40, 30, op_type, symmetric, on_the_fly,
pre_compute);
}
void quantized_matmul_pre_comp_greater_on_the_fly_2x20x40_by_2x40x30() {
zdnn_matmul_ops op_type = MATMUL_OP_GREATER;
bool symmetric = true;
bool on_the_fly = true;
bool pre_compute = true;
quantized_matmul_smn_by_snp(2, 20, 40, 30, op_type, symmetric, on_the_fly,
pre_compute);
}
void quantized_matmul_pre_comp_greater_on_the_fly_20x40_by_2x40x30() {
zdnn_matmul_ops op_type = MATMUL_OP_GREATER;
bool symmetric = true;
bool on_the_fly = true;
bool pre_compute = true;
quantized_matmul_mn_by_snp(2, 20, 40, 30, op_type, symmetric, on_the_fly,
pre_compute);
}
void quantized_matmul_pre_comp_greater_on_the_fly_2x20x40_by_40x30() {
zdnn_matmul_ops op_type = MATMUL_OP_GREATER;
bool symmetric = true;
bool on_the_fly = true;
bool pre_compute = true;
quantized_matmul_smn_by_np(2, 20, 40, 30, op_type, symmetric, on_the_fly,
pre_compute);
}
void quantized_matmul_pre_comp_greater_equal_on_the_fly_2x20x40_by_2x40x30() {
zdnn_matmul_ops op_type = MATMUL_OP_GREATER_EQUAL;
bool symmetric = true;
bool on_the_fly = true;
bool pre_compute = true;
quantized_matmul_smn_by_snp(2, 20, 40, 30, op_type, symmetric, on_the_fly,
pre_compute);
}
void quantized_matmul_pre_comp_greater_equal_on_the_fly_20x40_by_2x40x30() {
zdnn_matmul_ops op_type = MATMUL_OP_GREATER_EQUAL;
bool symmetric = true;
bool on_the_fly = true;
bool pre_compute = true;
quantized_matmul_mn_by_snp(2, 20, 40, 30, op_type, symmetric, on_the_fly,
pre_compute);
}
void quantized_matmul_pre_comp_greater_equal_on_the_fly_2x20x40_by_40x30() {
zdnn_matmul_ops op_type = MATMUL_OP_GREATER_EQUAL;
bool symmetric = true;
bool on_the_fly = true;
bool pre_compute = true;
quantized_matmul_smn_by_np(2, 20, 40, 30, op_type, symmetric, on_the_fly,
pre_compute);
}
void quantized_matmul_pre_comp_equal_on_the_fly_2x20x40_by_2x40x30() {
zdnn_matmul_ops op_type = MATMUL_OP_EQUAL;
bool symmetric = true;
bool on_the_fly = true;
bool pre_compute = true;
quantized_matmul_smn_by_snp(2, 20, 40, 30, op_type, symmetric, on_the_fly,
pre_compute);
}
void quantized_matmul_pre_comp_equal_on_the_fly_20x40_by_2x40x30() {
zdnn_matmul_ops op_type = MATMUL_OP_EQUAL;
bool symmetric = true;
bool on_the_fly = true;
bool pre_compute = true;
quantized_matmul_mn_by_snp(2, 20, 40, 30, op_type, symmetric, on_the_fly,
pre_compute);
}
void quantized_matmul_pre_comp_equal_on_the_fly_2x20x40_by_40x30() {
zdnn_matmul_ops op_type = MATMUL_OP_EQUAL;
bool symmetric = true;
bool on_the_fly = true;
bool pre_compute = true;
quantized_matmul_smn_by_np(2, 20, 40, 30, op_type, symmetric, on_the_fly,
pre_compute);
}
void quantized_matmul_pre_comp_not_equal_on_the_fly_2x20x40_by_2x40x30() {
zdnn_matmul_ops op_type = MATMUL_OP_NOT_EQUAL;
bool symmetric = true;
bool on_the_fly = true;
bool pre_compute = true;
quantized_matmul_smn_by_snp(2, 20, 40, 30, op_type, symmetric, on_the_fly,
pre_compute);
}
void quantized_matmul_pre_comp_not_equal_on_the_fly_20x40_by_2x40x30() {
zdnn_matmul_ops op_type = MATMUL_OP_NOT_EQUAL;
bool symmetric = true;
bool on_the_fly = true;
bool pre_compute = true;
quantized_matmul_mn_by_snp(2, 20, 40, 30, op_type, symmetric, on_the_fly,
pre_compute);
}
void quantized_matmul_pre_comp_not_equal_on_the_fly_2x20x40_by_40x30() {
zdnn_matmul_ops op_type = MATMUL_OP_NOT_EQUAL;
bool symmetric = true;
bool on_the_fly = true;
bool pre_compute = true;
quantized_matmul_smn_by_np(2, 20, 40, 30, op_type, symmetric, on_the_fly,
pre_compute);
}
void quantized_matmul_pre_comp_lesser_equal_on_the_fly_2x20x40_by_2x40x30() {
zdnn_matmul_ops op_type = MATMUL_OP_LESSER_EQUAL;
bool symmetric = true;
bool on_the_fly = true;
bool pre_compute = true;
quantized_matmul_smn_by_snp(2, 20, 40, 30, op_type, symmetric, on_the_fly,
pre_compute);
}
void quantized_matmul_pre_comp_lesser_equal_on_the_fly_20x40_by_2x40x30() {
zdnn_matmul_ops op_type = MATMUL_OP_LESSER_EQUAL;
bool symmetric = true;
bool on_the_fly = true;
bool pre_compute = true;
quantized_matmul_mn_by_snp(2, 20, 40, 30, op_type, symmetric, on_the_fly,
pre_compute);
}
void quantized_matmul_pre_comp_lesser_equal_on_the_fly_2x20x40_by_40x30() {
zdnn_matmul_ops op_type = MATMUL_OP_LESSER_EQUAL;
bool symmetric = true;
bool on_the_fly = true;
bool pre_compute = true;
quantized_matmul_smn_by_np(2, 20, 40, 30, op_type, symmetric, on_the_fly,
pre_compute);
}
void quantized_matmul_pre_comp_lesser_on_the_fly_2x20x40_by_2x40x30() {
zdnn_matmul_ops op_type = MATMUL_OP_LESSER;
bool symmetric = true;
bool on_the_fly = true;
bool pre_compute = true;
quantized_matmul_smn_by_snp(2, 20, 40, 30, op_type, symmetric, on_the_fly,
pre_compute);
}
void quantized_matmul_pre_comp_lesser_on_the_fly_20x40_by_2x40x30() {
zdnn_matmul_ops op_type = MATMUL_OP_LESSER;
bool symmetric = true;
bool on_the_fly = true;
bool pre_compute = true;
quantized_matmul_mn_by_snp(2, 20, 40, 30, op_type, symmetric, on_the_fly,
pre_compute);
}
void quantized_matmul_pre_comp_lesser_on_the_fly_2x20x40_by_40x30() {
zdnn_matmul_ops op_type = MATMUL_OP_LESSER;
bool symmetric = true;
bool on_the_fly = true;
bool pre_compute = true;
quantized_matmul_smn_by_np(2, 20, 40, 30, op_type, symmetric, on_the_fly,
pre_compute);
}
void quantized_matmul_invalid_op() {
uint64_t num_values = 0;
// Setup Input A using random values
uint32_t input_a_shape[] = {2, 20, 40};
num_values = 2 * 20 * 40;
float *input_a_values = malloc(num_values * sizeof(float));
float a_min = -100.f;
float a_max = 80.f;
gen_random_float_array_range(num_values, input_a_values, a_min, a_max);
float a_scale, a_offset;
gen_scale_and_offset(a_min, a_max, &a_scale, &a_offset);
// Setup Input B using random values
uint32_t input_b_shape[] = {2, 40, 30};
num_values = 2 * 40 * 30;
float *input_b_values = malloc(num_values * sizeof(float));
float b_min = -20.f;
float b_max = 20.f;
gen_random_float_array_range(num_values, input_b_values, b_min, b_max);
float b_scale, b_offset;
gen_scale_and_offset(b_min, b_max, &b_scale, &b_offset);
// Setup Input bias using random values
uint32_t input_c_shape[] = {2, 30};
num_values = 2 * 30;
float *input_c_values = malloc(num_values * sizeof(float));
float c_min = -500.f;
float c_max = 500.f;
gen_random_float_array_range(num_values, input_c_values, c_min, c_max);
float c_scale, c_offset;
gen_scale_and_offset(c_min, c_max, &c_scale, &c_offset);
// Setup Output and expected values
uint32_t output_shape[] = {2, 20, 30};
// Manually set invalid op_type
zdnn_matmul_ops op_type = 7;
test_zdnn_api_quantized_matmul(
input_a_shape, ZDNN_3DS, input_a_values, a_scale, a_offset, INT8_MIN,
INT8_MAX,
input_b_shape, ZDNN_3DS, input_b_values, b_scale, b_offset,
input_c_shape, ZDNN_2DS, input_c_values, c_scale, c_offset,
output_shape, ZDNN_3DS,
op_type, true, ZDNN_FUNC_RC_F000, default_disable_clipping);
free(input_a_values);
free(input_b_values);
free(input_c_values);
}
void quantized_matmul_invalid_format() {
uint64_t num_values = 0;
// Setup Input A using random values
uint32_t input_a_shape[] = {2, 20, 40};
num_values = 2 * 20 * 40;
float *input_a_values = malloc(num_values * sizeof(float));
float a_min = -100.f;
float a_max = 80.f;
gen_random_float_array_range(num_values, input_a_values, a_min, a_max);
float a_scale, a_offset;
gen_scale_and_offset(a_min, a_max, &a_scale, &a_offset);
// Setup Input B using random values
uint32_t input_b_shape[] = {2, 40, 30};
num_values = 2 * 40 * 30;
float *input_b_values = malloc(num_values * sizeof(float));
float b_min = -20.f;
float b_max = 20.f;
gen_random_float_array_range(num_values, input_b_values, b_min, b_max);
float b_scale, b_offset;
gen_scale_and_offset(b_min, b_max, &b_scale, &b_offset);
// Setup Input bias using random values
uint32_t input_c_shape[] = {2, 30};
num_values = 2 * 30;
float *input_c_values = malloc(num_values * sizeof(float));
float c_min = -500.f;
float c_max = 500.f;
gen_random_float_array_range(num_values, input_c_values, c_min, c_max);
float c_scale, c_offset;
gen_scale_and_offset(c_min, c_max, &c_scale, &c_offset);
// Setup Output and expected values
uint32_t output_shape[] = {2, 20, 30};
num_values = 2 * 20 * 30;
float *exp_out_values = malloc(num_values * sizeof(float));
float y_scale, y_offset;
gen_test_expected_fp32_array(2, 20, 40, 30, input_a_values, input_b_values,
input_c_values, a_scale, a_offset, b_scale,
b_offset, c_scale, c_offset, exp_out_values,
&y_scale, &y_offset, MATMUL_OP_ADDITION);
// Setup ztensors
zdnn_ztensor *input, *weights, *biases, *out;
// Manually set invalid format for input
input = alloc_quantized_ztensor_with_values(
input_a_shape, ZDNN_3DS, INT8, QUANTIZED_WEIGHTS_INT8, input_a_values,
a_scale, a_offset);
weights = alloc_quantized_ztensor_with_values(
input_b_shape, ZDNN_3DS, INT8, QUANTIZED_WEIGHTS_INT8, input_b_values,
b_scale, b_offset);
biases = alloc_quantized_ztensor_with_values(input_c_shape, ZDNN_2DS, FP32,
QUANTIZED_INT8, input_c_values,
c_scale, c_offset);
out = alloc_quantized_ztensor_with_values(output_shape, ZDNN_3DS, FP32,
QUANTIZED_DLFLOAT16, NULL, y_scale,
y_offset);
// dequantize=true
zdnn_status status = zdnn_quantized_matmul_op(
input, weights, biases, MATMUL_OP_ADDITION, INT8_MIN, INT8_MAX,
default_disable_clipping, true, false, NULL, out);
TEST_ASSERT_MESSAGE_FORMATTED(status == ZDNN_FUNC_RC_F001,
"call to zdnn_quantized_matmul_op() returned "
"status %08x \"%s\" but expected %08x \"%s\"",
status, zdnn_get_status_message(status),
ZDNN_FUNC_RC_F001,
zdnn_get_status_message(ZDNN_FUNC_RC_F001));
// Free ztensors
free_ztensor_buffers(4, input, weights, biases, out);
// Free data buffers
free(input_a_values);
free(input_b_values);
free(input_c_values);
free(exp_out_values);
}
void quantized_matmul_invalid_M() {
uint64_t num_values = 0;
// Setup Input A using random values
uint32_t input_a_shape[] = {2, 20, 40};
num_values = 2 * 20 * 40;
float *input_a_values = malloc(num_values * sizeof(float));
float a_min = -100.f;
float a_max = 80.f;
gen_random_float_array_range(num_values, input_a_values, a_min, a_max);
float a_scale, a_offset;
gen_scale_and_offset(a_min, a_max, &a_scale, &a_offset);
// Setup Input B using random values
uint32_t input_b_shape[] = {2, 40, 30};
num_values = 2 * 40 * 30;
float *input_b_values = malloc(num_values * sizeof(float));
float b_min = -20.f;
float b_max = 20.f;
gen_random_float_array_range(num_values, input_b_values, b_min, b_max);
float b_scale, b_offset;
gen_scale_and_offset(b_min, b_max, &b_scale, &b_offset);
// Setup Input bias using random values
uint32_t input_c_shape[] = {2, 30};
num_values = 2 * 30;
float *input_c_values = malloc(num_values * sizeof(float));
float c_min = -500.f;
float c_max = 500.f;
gen_random_float_array_range(num_values, input_c_values, c_min, c_max);
float c_scale, c_offset;
gen_scale_and_offset(c_min, c_max, &c_scale, &c_offset);
// Setup Output and expected values
uint32_t output_shape[] = {2, 20, 30};
zdnn_matmul_ops op_type = MATMUL_OP_ADDITION;
// Manually set invalid scale, which will cause invalid M value to be
// computed
b_scale = 0;
test_zdnn_api_quantized_matmul(
input_a_shape, ZDNN_3DS, input_a_values, a_scale, a_offset, INT8_MIN,
INT8_MAX,
input_b_shape, ZDNN_3DS, input_b_values, b_scale, b_offset,
input_c_shape, ZDNN_2DS, input_c_values, c_scale, c_offset,
output_shape, ZDNN_3DS,
op_type, true, ZDNN_FUNC_RC_F002, default_disable_clipping);
free(input_a_values);
free(input_b_values);
free(input_c_values);
}
void quantized_matmul_pre_comp_invalid_Zb() {
zdnn_matmul_ops op_type = MATMUL_OP_ADDITION;
bool symmetric = false; // force zB != 0.f
bool on_the_fly = false;
uint64_t num_values = 0;
// Setup Input A using random values
uint32_t input_a_shape[] = {2, 20, 40};
num_values = 2 * 20 * 40;
float *input_a_values = malloc(num_values * sizeof(float));
float a_min = -100.f;
float a_max = 80.f;
gen_random_float_array_range(num_values, input_a_values, a_min, a_max);
float a_scale, a_offset;
gen_scale_and_offset(a_min, a_max, &a_scale, &a_offset);
// Setup Input B using random values
uint32_t input_b_shape[] = {2, 40, 30};
num_values = 2 * 40 * 30;
float *input_b_values = malloc(num_values * sizeof(float));
float b_min = -20.f;
float b_max = symmetric ? 20.f : 10.f;
gen_random_float_array_range(num_values, input_b_values, b_min, b_max);
float b_scale, b_offset;
gen_scale_and_offset(b_min, b_max, &b_scale, &b_offset);
// Setup Input bias using random values
uint32_t input_c_shape[] = {2, 30};
num_values = 2 * 30;
float *input_c_values = malloc(num_values * sizeof(float));
float c_min = -500.f;
float c_max = 500.f;
gen_random_float_array_range(num_values, input_c_values, c_min, c_max);
float c_scale, c_offset;
gen_scale_and_offset(c_min, c_max, &c_scale, &c_offset);
// Setup Output and expected values
uint32_t output_shape[] = {2, 20, 30};
test_zdnn_api_quantized_matmul_pre_computed(
input_a_shape, ZDNN_3DS, input_a_values, a_scale, a_offset, INT8_MIN,
INT8_MAX,
input_b_shape, ZDNN_3DS, input_b_values, b_scale, b_offset,
input_c_shape, ZDNN_2DS, input_c_values, c_scale, c_offset,
output_shape, ZDNN_3DS,
op_type, on_the_fly, ZDNN_INVALID_OFFSET);
}
int main() {
UNITY_BEGIN();
/*
* Quantized Bias Tests
*/
// BiasAdd tests
RUN_TEST(quantized_matmul_biasadd_basic);
RUN_TEST(quantized_matmul_biasadd_bcast1_basic);
RUN_TEST(quantized_matmul_biasadd_bcast23_basic);
// Symmetric weights test
RUN_TEST(quantized_matmul_biasadd_symmetric);
RUN_TEST(quantized_matmul_biasadd_bcast1_symmetric);
RUN_TEST(quantized_matmul_biasadd_bcast23_symmetric);
// Symmetric weights test - no clipping
RUN_TEST(quantized_matmul_biasadd_symmetric_no_clipping);
RUN_TEST(quantized_matmul_biasadd_bcast1_symmetric_no_clipping);
RUN_TEST(quantized_matmul_biasadd_bcast23_symmetric_no_clipping);
// Quantize on the fly tests
RUN_TEST(quantized_matmul_biasadd_on_the_fly);
RUN_TEST(quantized_matmul_biasadd_bcast1_on_the_fly);
RUN_TEST(quantized_matmul_biasadd_bcast23_on_the_fly);
// BiasAdd tests (random)
RUN_TEST(quantized_matmul_biasadd_2x20x40_by_2x40x30);
RUN_TEST(quantized_matmul_biasadd_20x40_by_2x40x30);
RUN_TEST(quantized_matmul_biasadd_2x20x40_by_40x30);
// Symmetric weights test (random)
RUN_TEST(quantized_matmul_biasadd_symmetric_2x20x40_by_2x40x30);
RUN_TEST(quantized_matmul_biasadd_symmetric_20x40_by_2x40x30);
RUN_TEST(quantized_matmul_biasadd_symmetric_2x20x40_by_40x30);
// Quantize on the fly tests (random)
RUN_TEST(quantized_matmul_biasadd_on_the_fly_2x20x40_by_2x40x30);
RUN_TEST(quantized_matmul_biasadd_on_the_fly_20x40_by_2x40x30);
RUN_TEST(quantized_matmul_biasadd_on_the_fly_2x20x40_by_40x30);
// Compare tests (always symmetric weights)
RUN_TEST(quantized_matmul_greater_basic);
RUN_TEST(quantized_matmul_greater_bcast1_basic);
RUN_TEST(quantized_matmul_greater_bcast23_basic);
RUN_TEST(quantized_matmul_greater_equal_basic);
RUN_TEST(quantized_matmul_greater_equal_bcast1_basic);
RUN_TEST(quantized_matmul_greater_equal_bcast23_basic);
RUN_TEST(quantized_matmul_equal_basic);
RUN_TEST(quantized_matmul_equal_bcast1_basic);
RUN_TEST(quantized_matmul_equal_bcast23_basic);
RUN_TEST(quantized_matmul_not_equal_basic);
RUN_TEST(quantized_matmul_not_equal_bcast1_basic);
RUN_TEST(quantized_matmul_not_equal_bcast23_basic);
RUN_TEST(quantized_matmul_lesser_equal_basic);
RUN_TEST(quantized_matmul_lesser_equal_bcast1_basic);
RUN_TEST(quantized_matmul_lesser_equal_bcast23_basic);
RUN_TEST(quantized_matmul_lesser_basic);
RUN_TEST(quantized_matmul_lesser_bcast1_basic);
RUN_TEST(quantized_matmul_lesser_bcast23_basic);
// Compare quantized on the fly tests (always symmetric weights)
RUN_TEST(quantized_matmul_on_the_fly_greater_basic);
RUN_TEST(quantized_matmul_on_the_fly_greater_bcast1_basic);
RUN_TEST(quantized_matmul_on_the_fly_greater_bcast23_basic);
RUN_TEST(quantized_matmul_on_the_fly_greater_equal_basic);
RUN_TEST(quantized_matmul_on_the_fly_greater_equal_bcast1_basic);
RUN_TEST(quantized_matmul_on_the_fly_greater_equal_bcast23_basic);
RUN_TEST(quantized_matmul_on_the_fly_equal_basic);
RUN_TEST(quantized_matmul_on_the_fly_equal_bcast1_basic);
RUN_TEST(quantized_matmul_on_the_fly_equal_bcast23_basic);
RUN_TEST(quantized_matmul_on_the_fly_not_equal_basic);
RUN_TEST(quantized_matmul_on_the_fly_not_equal_bcast1_basic);
RUN_TEST(quantized_matmul_on_the_fly_not_equal_bcast23_basic);
RUN_TEST(quantized_matmul_on_the_fly_lesser_equal_basic);
RUN_TEST(quantized_matmul_on_the_fly_lesser_equal_bcast1_basic);
RUN_TEST(quantized_matmul_on_the_fly_lesser_equal_bcast23_basic);
RUN_TEST(quantized_matmul_on_the_fly_lesser_basic);
RUN_TEST(quantized_matmul_on_the_fly_lesser_bcast1_basic);
RUN_TEST(quantized_matmul_on_the_fly_lesser_bcast23_basic);
// Compare tests (random) (always symmetric weights)
RUN_TEST(quantized_matmul_greater_2x20x40_by_2x40x30);
RUN_TEST(quantized_matmul_greater_20x40_by_2x40x30);
RUN_TEST(quantized_matmul_greater_2x20x40_by_40x30);
RUN_TEST(quantized_matmul_greater_equal_2x20x40_by_2x40x30);
RUN_TEST(quantized_matmul_greater_equal_20x40_by_2x40x30);
RUN_TEST(quantized_matmul_greater_equal_2x20x40_by_40x30);
RUN_TEST(quantized_matmul_equal_2x20x40_by_2x40x30);
RUN_TEST(quantized_matmul_equal_20x40_by_2x40x30);
RUN_TEST(quantized_matmul_equal_2x20x40_by_40x30);
RUN_TEST(quantized_matmul_not_equal_2x20x40_by_2x40x30);
RUN_TEST(quantized_matmul_not_equal_20x40_by_2x40x30);
RUN_TEST(quantized_matmul_not_equal_2x20x40_by_40x30);
RUN_TEST(quantized_matmul_lesser_equal_2x20x40_by_2x40x30);
RUN_TEST(quantized_matmul_lesser_equal_20x40_by_2x40x30);
RUN_TEST(quantized_matmul_lesser_equal_2x20x40_by_40x30);
RUN_TEST(quantized_matmul_lesser_2x20x40_by_2x40x30);
RUN_TEST(quantized_matmul_lesser_20x40_by_2x40x30);
RUN_TEST(quantized_matmul_lesser_2x20x40_by_40x30);
// Compare quantized on the fly tests (random) (always symmetric weights)
RUN_TEST(quantized_matmul_greater_on_the_fly_2x20x40_by_2x40x30);
RUN_TEST(quantized_matmul_greater_on_the_fly_20x40_by_2x40x30);
RUN_TEST(quantized_matmul_greater_on_the_fly_2x20x40_by_40x30);
RUN_TEST(quantized_matmul_greater_equal_on_the_fly_2x20x40_by_2x40x30);
RUN_TEST(quantized_matmul_greater_equal_on_the_fly_20x40_by_2x40x30);
RUN_TEST(quantized_matmul_greater_equal_on_the_fly_2x20x40_by_40x30);
RUN_TEST(quantized_matmul_equal_on_the_fly_2x20x40_by_2x40x30);
RUN_TEST(quantized_matmul_equal_on_the_fly_20x40_by_2x40x30);
RUN_TEST(quantized_matmul_equal_on_the_fly_2x20x40_by_40x30);
RUN_TEST(quantized_matmul_not_equal_on_the_fly_2x20x40_by_2x40x30);
RUN_TEST(quantized_matmul_not_equal_on_the_fly_20x40_by_2x40x30);
RUN_TEST(quantized_matmul_not_equal_on_the_fly_2x20x40_by_40x30);
RUN_TEST(quantized_matmul_lesser_equal_on_the_fly_2x20x40_by_2x40x30);
RUN_TEST(quantized_matmul_lesser_equal_on_the_fly_20x40_by_2x40x30);
RUN_TEST(quantized_matmul_lesser_equal_on_the_fly_2x20x40_by_40x30);
RUN_TEST(quantized_matmul_lesser_on_the_fly_2x20x40_by_2x40x30);
RUN_TEST(quantized_matmul_lesser_on_the_fly_20x40_by_2x40x30);
RUN_TEST(quantized_matmul_lesser_on_the_fly_2x20x40_by_40x30);
/*
* Pre-Computed Bias Tests
*/
// BiasAdd test (always symmetric weights)
RUN_TEST(quantized_matmul_pre_comp_biasadd);
RUN_TEST(quantized_matmul_pre_comp_biasadd_bcast1);
RUN_TEST(quantized_matmul_pre_comp_biasadd_bcast23);
// BiasAdd quantized on the fly tests (always symmetric weights)
RUN_TEST(quantized_matmul_pre_comp_biasadd_on_the_fly);
RUN_TEST(quantized_matmul_pre_comp_biasadd_bcast1_on_the_fly);
RUN_TEST(quantized_matmul_pre_comp_biasadd_bcast23_on_the_fly);
// BiasAdd tests (random) (always symmetric weights)
RUN_TEST(quantized_matmul_pre_comp_biasadd_2x20x40_by_2x40x30);
RUN_TEST(quantized_matmul_pre_comp_biasadd_20x40_by_2x40x30);
RUN_TEST(quantized_matmul_pre_comp_biasadd_2x20x40_by_40x30);
// BiasAdd quantized on the fly tests (random) (always symmetric weights)
RUN_TEST(quantized_matmul_pre_comp_biasadd_on_the_fly_2x20x40_by_2x40x30);
RUN_TEST(quantized_matmul_pre_comp_biasadd_on_the_fly_20x40_by_2x40x30);
RUN_TEST(quantized_matmul_pre_comp_biasadd_on_the_fly_2x20x40_by_40x30);
// Compare tests (always symmetric weights)
RUN_TEST(quantized_matmul_pre_comp_greater_basic);
RUN_TEST(quantized_matmul_pre_comp_greater_bcast1_basic);
RUN_TEST(quantized_matmul_pre_comp_greater_bcast23_basic);
RUN_TEST(quantized_matmul_pre_comp_greater_equal_basic);
RUN_TEST(quantized_matmul_pre_comp_greater_equal_bcast1_basic);
RUN_TEST(quantized_matmul_pre_comp_greater_equal_bcast23_basic);
RUN_TEST(quantized_matmul_pre_comp_equal_basic);
RUN_TEST(quantized_matmul_pre_comp_equal_bcast1_basic);
RUN_TEST(quantized_matmul_pre_comp_equal_bcast23_basic);
RUN_TEST(quantized_matmul_pre_comp_not_equal_basic);
RUN_TEST(quantized_matmul_pre_comp_not_equal_bcast1_basic);
RUN_TEST(quantized_matmul_pre_comp_not_equal_bcast23_basic);
RUN_TEST(quantized_matmul_pre_comp_lesser_equal_basic);
RUN_TEST(quantized_matmul_pre_comp_lesser_equal_bcast1_basic);
RUN_TEST(quantized_matmul_pre_comp_lesser_equal_bcast23_basic);
RUN_TEST(quantized_matmul_pre_comp_lesser_basic);
RUN_TEST(quantized_matmul_pre_comp_lesser_bcast1_basic);
RUN_TEST(quantized_matmul_pre_comp_lesser_bcast23_basic);
// Compare quantized on the fly tests (always symmetric weights)
RUN_TEST(quantized_matmul_pre_comp_on_the_fly_greater_basic);
RUN_TEST(quantized_matmul_pre_comp_on_the_fly_greater_bcast1_basic);
RUN_TEST(quantized_matmul_pre_comp_on_the_fly_greater_bcast23_basic);
RUN_TEST(quantized_matmul_pre_comp_on_the_fly_greater_equal_basic);
RUN_TEST(quantized_matmul_pre_comp_on_the_fly_greater_equal_bcast1_basic);
RUN_TEST(quantized_matmul_pre_comp_on_the_fly_greater_equal_bcast23_basic);
RUN_TEST(quantized_matmul_pre_comp_on_the_fly_equal_basic);
RUN_TEST(quantized_matmul_pre_comp_on_the_fly_equal_bcast1_basic);
RUN_TEST(quantized_matmul_pre_comp_on_the_fly_equal_bcast23_basic);
RUN_TEST(quantized_matmul_pre_comp_on_the_fly_not_equal_basic);
RUN_TEST(quantized_matmul_pre_comp_on_the_fly_not_equal_bcast1_basic);
RUN_TEST(quantized_matmul_pre_comp_on_the_fly_not_equal_bcast23_basic);
RUN_TEST(quantized_matmul_pre_comp_on_the_fly_lesser_equal_basic);
RUN_TEST(quantized_matmul_pre_comp_on_the_fly_lesser_equal_bcast1_basic);
RUN_TEST(quantized_matmul_pre_comp_on_the_fly_lesser_equal_bcast23_basic);
RUN_TEST(quantized_matmul_pre_comp_on_the_fly_lesser_basic);
RUN_TEST(quantized_matmul_pre_comp_on_the_fly_lesser_bcast1_basic);
RUN_TEST(quantized_matmul_pre_comp_on_the_fly_lesser_bcast23_basic);
// Compare tests (random) (always symmetric weights)
RUN_TEST(quantized_matmul_pre_comp_greater_2x20x40_by_2x40x30);
RUN_TEST(quantized_matmul_pre_comp_greater_20x40_by_2x40x30);
RUN_TEST(quantized_matmul_pre_comp_greater_2x20x40_by_40x30);
RUN_TEST(quantized_matmul_pre_comp_greater_equal_2x20x40_by_2x40x30);
RUN_TEST(quantized_matmul_pre_comp_greater_equal_20x40_by_2x40x30);
RUN_TEST(quantized_matmul_pre_comp_greater_equal_2x20x40_by_40x30);
RUN_TEST(quantized_matmul_pre_comp_equal_2x20x40_by_2x40x30);
RUN_TEST(quantized_matmul_pre_comp_equal_20x40_by_2x40x30);
RUN_TEST(quantized_matmul_pre_comp_equal_2x20x40_by_40x30);
RUN_TEST(quantized_matmul_pre_comp_not_equal_2x20x40_by_2x40x30);
RUN_TEST(quantized_matmul_pre_comp_not_equal_20x40_by_2x40x30);
RUN_TEST(quantized_matmul_pre_comp_not_equal_2x20x40_by_40x30);
RUN_TEST(quantized_matmul_pre_comp_lesser_equal_2x20x40_by_2x40x30);
RUN_TEST(quantized_matmul_pre_comp_lesser_equal_20x40_by_2x40x30);
RUN_TEST(quantized_matmul_pre_comp_lesser_equal_2x20x40_by_40x30);
RUN_TEST(quantized_matmul_pre_comp_lesser_2x20x40_by_2x40x30);
RUN_TEST(quantized_matmul_pre_comp_lesser_20x40_by_2x40x30);
RUN_TEST(quantized_matmul_pre_comp_lesser_2x20x40_by_40x30);
// Compare quantized on the fly tests (random) (always symmetric weights)
RUN_TEST(quantized_matmul_pre_comp_greater_on_the_fly_2x20x40_by_2x40x30);
RUN_TEST(quantized_matmul_pre_comp_greater_on_the_fly_20x40_by_2x40x30);
RUN_TEST(quantized_matmul_pre_comp_greater_on_the_fly_2x20x40_by_40x30);
RUN_TEST(
quantized_matmul_pre_comp_greater_equal_on_the_fly_2x20x40_by_2x40x30);
RUN_TEST(quantized_matmul_pre_comp_greater_equal_on_the_fly_20x40_by_2x40x30);
RUN_TEST(quantized_matmul_pre_comp_greater_equal_on_the_fly_2x20x40_by_40x30);
RUN_TEST(quantized_matmul_pre_comp_equal_on_the_fly_2x20x40_by_2x40x30);
RUN_TEST(quantized_matmul_pre_comp_equal_on_the_fly_20x40_by_2x40x30);
RUN_TEST(quantized_matmul_pre_comp_equal_on_the_fly_2x20x40_by_40x30);
RUN_TEST(quantized_matmul_pre_comp_not_equal_on_the_fly_2x20x40_by_2x40x30);
RUN_TEST(quantized_matmul_pre_comp_not_equal_on_the_fly_20x40_by_2x40x30);
RUN_TEST(quantized_matmul_pre_comp_not_equal_on_the_fly_2x20x40_by_40x30);
RUN_TEST(
quantized_matmul_pre_comp_lesser_equal_on_the_fly_2x20x40_by_2x40x30);
RUN_TEST(quantized_matmul_pre_comp_lesser_equal_on_the_fly_20x40_by_2x40x30);
RUN_TEST(quantized_matmul_pre_comp_lesser_equal_on_the_fly_2x20x40_by_40x30);
RUN_TEST(quantized_matmul_pre_comp_lesser_on_the_fly_2x20x40_by_2x40x30);
RUN_TEST(quantized_matmul_pre_comp_lesser_on_the_fly_20x40_by_2x40x30);
RUN_TEST(quantized_matmul_pre_comp_lesser_on_the_fly_2x20x40_by_40x30);
RUN_TEST(quantized_matmul_invalid_op);
RUN_TEST(quantized_matmul_invalid_format);
RUN_TEST(quantized_matmul_invalid_M);
RUN_TEST(quantized_matmul_pre_comp_invalid_Zb);
return UNITY_END();
}
zDNN-1.1.2/tests/testDriver_zdnn_reduce.c 0000664 0000000 0000000 00000023630 15000221702 0020336 0 ustar 00root root 0000000 0000000 // SPDX-License-Identifier: Apache-2.0
/*
* Copyright IBM Corp. 2023, 2024
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#include "common_elwise.h"
void setUp(void) {
VERIFY_HW_ENV;
VERIFY_PARMBLKFORMAT_1;
}
void tearDown(void) {}
/*
* Simple test to drive a full reduce api.
*/
void zdnn_reduce_val_test(uint32_t *in_dims, zdnn_data_layouts layout,
float *input, uint32_t *out_dims,
zdnn_reduce_ops op_type, zdnn_status expected_status,
float *expected_values) {
/*
* Input Tensor
*/
zdnn_ztensor *input_ztensor = alloc_ztensor_with_values(
in_dims, layout, test_datatype, NO_CONCAT, false, input);
/*
* Output Tensor
*/
zdnn_ztensor *output_ztensor =
alloc_output_ztensor(out_dims, layout, test_datatype, NO_CONCAT);
/*
* Begin Testing!
*/
zdnn_status status =
zdnn_reduce(input_ztensor, NULL, op_type, output_ztensor);
TEST_ASSERT_MESSAGE_FORMATTED(status == expected_status,
"call to zdnn_reduce() with op_type %d "
"returned status %08x but expected %08x\n",
op_type, status, expected_status);
if (expected_status == ZDNN_OK) {
assert_ztensor_values(output_ztensor, false, expected_values);
}
zdnn_reset_ztensor(output_ztensor);
void *self_workarea = malloc_aligned_4k(ZDNN_8K_SAVEAREA_SIZE);
TEST_ASSERT_MESSAGE_FORMATTED(
self_workarea, "%s() - can't allocate SOFTMAX workarea\n", __func__);
status = zdnn_reduce(input_ztensor, self_workarea, op_type, output_ztensor);
TEST_ASSERT_MESSAGE_FORMATTED(
status == expected_status,
"call to zdnn_reduce() with op_type %d and provided "
"work_area returned status %08x but expected %08x\n",
op_type, status, expected_status);
if (expected_status == ZDNN_OK) {
assert_ztensor_values(output_ztensor, false, expected_values);
}
free_aligned_4k(self_workarea);
// All done--clean up the tensor buffers
free_ztensor_buffers(2, input_ztensor, output_ztensor);
}
void zdnn_reduce_idx_test(uint32_t *in_dims, zdnn_data_layouts layout,
float *input, uint32_t *out_dims,
zdnn_reduce_ops op_type, zdnn_status expected_status,
uint32_t *expected_values) {
/*
* Input Tensor
*/
zdnn_ztensor *input_ztensor = alloc_ztensor_with_values(
in_dims, layout, test_datatype, NO_CONCAT, false, input);
/*
* Output Tensor
*/
zdnn_ztensor *output_ztensor =
alloc_output_ztensor(out_dims, layout, INT32, NO_CONCAT);
/*
* Begin Testing!
*/
zdnn_status status =
zdnn_reduce(input_ztensor, NULL, op_type, output_ztensor);
TEST_ASSERT_MESSAGE_FORMATTED(status == expected_status,
"call to zdnn_reduce() with op_type %d "
"returned status %08x but expected %08x\n",
op_type, status, expected_status);
if (expected_status == ZDNN_OK) {
assert_ztensor_values(output_ztensor, false, expected_values);
}
zdnn_reset_ztensor(output_ztensor);
void *self_workarea = malloc_aligned_4k(ZDNN_8K_SAVEAREA_SIZE);
TEST_ASSERT_MESSAGE_FORMATTED(
self_workarea, "%s() - can't allocate SOFTMAX workarea\n", __func__);
status = zdnn_reduce(input_ztensor, self_workarea, op_type, output_ztensor);
TEST_ASSERT_MESSAGE_FORMATTED(
status == expected_status,
"call to zdnn_reduce() with op_type %d and provided "
"work_area returned status %08x but expected %08x\n",
op_type, status, expected_status);
if (expected_status == ZDNN_OK) {
assert_ztensor_values(output_ztensor, false, expected_values);
}
free_aligned_4k(self_workarea);
// All done--clean up the tensor buffers
free_ztensor_buffers(2, input_ztensor, output_ztensor);
}
void api_reduce_basic_min() {
/* Input values as true NHWC sized (1,2,2,2)
[[
[[3, 30], [6, 60]],
[[8, 80], [3, 10]]
]]
*/
// Values in ZDNN_NHWC order
uint32_t in_shape[] = {1, 2, 2, 2};
float input_values[] = {3, 30, 6, 60, 8, 80, 3, 10};
/* Expected values as true NHWC sized (1,2,2,1)
[[
[[3], [6]],
[[8], [3]]
]]
*/
uint32_t out_shape[] = {1, 2, 2, 1};
float expected_values[] = {3, 6, 8, 3};
zdnn_reduce_val_test(in_shape, ZDNN_NHWC, input_values, out_shape,
REDUCE_OP_MINIMUM, ZDNN_OK, expected_values);
}
void api_reduce_nchw_min() {
/* Input values as NCHW sized (1,2,2,2)
[[
[[3, 30], [6, 60]],
[[8, 80], [3, 10]]
]]
*/
/* Input values as true NHWC sized (1,2,2,2)
[[
[[3, 8], [30, 80]],
[[6, 3], [60, 10]]
]]
*/
// Values in NCHW order
uint32_t in_shape[] = {1, 2, 2, 2};
float input_values[] = {3, 30, 6, 60, 8, 80, 3, 10};
/* Expected values as true NCHW sized (1,1,2,2)
[[
[[3, 30], [3, 10]]
]]
*/
uint32_t out_shape[] = {1, 1, 2, 2};
float expected_values[] = {3, 30, 3, 10};
zdnn_reduce_val_test(in_shape, ZDNN_NCHW, input_values, out_shape,
REDUCE_OP_MINIMUM, ZDNN_OK, expected_values);
}
void api_reduce_basic_min_idx() {
/* Input values as true NHWC sized (1,2,2,2)
[[
[[3, 30], [6, 60]],
[[8, 80], [3, 10]]
]]
*/
// Values in ZDNN_NHWC order
uint32_t in_shape[] = {1, 2, 2, 2};
float input_values[] = {3, 30, 6, 60, 8, 80, 3, 10};
/* Expected values as true NHWC sized (1,2,2,1)
[[
[[0], [0]],
[[0], [0]]
]]
*/
uint32_t out_shape[] = {1, 2, 2, 1};
uint32_t expected_values[] = {0, 0, 0, 0};
zdnn_reduce_idx_test(in_shape, ZDNN_NHWC, input_values, out_shape,
REDUCE_OP_MINIMUM_IDX, ZDNN_OK, expected_values);
}
void api_reduce_nchw_min_idx() {
/* Input values as NCHW sized (1,2,2,2)
[[
[[3, 30], [6, 60]],
[[8, 80], [3, 10]]
]]
*/
/* Input values as true NHWC sized (1,2,2,2)
[[
[[3, 8], [30, 80]],
[[6, 3], [60, 10]]
]]
*/
// Values in ZDNN_NCHW order
uint32_t in_shape[] = {1, 2, 2, 2};
float input_values[] = {3, 30, 6, 60, 8, 80, 3, 10};
/* Expected values as true NCHW sized (1,1,2,2)
[[
[[0, 0]], [[1, 1]]
]]
*/
uint32_t out_shape[] = {1, 1, 2, 2};
uint32_t expected_values[] = {0, 0, 1, 1};
zdnn_reduce_idx_test(in_shape, ZDNN_NCHW, input_values, out_shape,
REDUCE_OP_MINIMUM_IDX, ZDNN_OK, expected_values);
}
void api_reduce_basic_max() {
/* Input values as true NHWC sized (1,2,2,2)
[[
[[3, 30], [6, 60]],
[[8, 80], [3, 10]]
]]
*/
// Values in ZDNN_NHWC order
uint32_t in_shape[] = {1, 2, 2, 2};
float input_values[] = {3, 30, 6, 60, 8, 80, 3, 10};
/* Expected values as true NHWC sized (1,2,2,1)
[[
[[30], [60]],
[[80], [10]]
]]
*/
uint32_t out_shape[] = {1, 2, 2, 1};
float expected_values[] = {30, 60, 80, 10};
zdnn_reduce_val_test(in_shape, ZDNN_NHWC, input_values, out_shape,
REDUCE_OP_MAXIMUM, ZDNN_OK, expected_values);
}
void api_reduce_nchw_max() {
/* Input values as NCHW sized (1,2,2,2)
[[
[[3, 30], [6, 60]],
[[8, 80], [3, 10]]
]]
*/
/* Input values as true NHWC sized (1,2,2,2)
[[
[[3, 8], [30, 80]],
[[6, 3], [60, 10]]
]]
*/
// Values in ZDNN_NCHW order
uint32_t in_shape[] = {1, 2, 2, 2};
float input_values[] = {3, 30, 6, 60, 8, 80, 3, 10};
/* Expected values as true NCHW sized (1,1,2,2)
[[
[[8, 80]], [[6, 60]]
]]
*/
uint32_t out_shape[] = {1, 1, 2, 2};
float expected_values[] = {8, 80, 6, 60};
zdnn_reduce_val_test(in_shape, ZDNN_NCHW, input_values, out_shape,
REDUCE_OP_MAXIMUM, ZDNN_OK, expected_values);
}
void api_reduce_basic_max_idx() {
/* Input values as true NHWC sized (1,2,2,2)
[[
[[3, 30], [6, 60]],
[[8, 80], [3, 10]]
]]
*/
// Values in ZDNN_NHWC order
uint32_t in_shape[] = {1, 2, 2, 2};
float input_values[] = {3, 30, 6, 60, 8, 80, 3, 10};
/* Expected values as true NHWC sized (1,2,2,1)
[[
[[1], [1]],
[[1], [1]]
]]
*/
uint32_t out_shape[] = {1, 2, 2, 1};
uint32_t expected_values[] = {1, 1, 1, 1};
zdnn_reduce_idx_test(in_shape, ZDNN_NHWC, input_values, out_shape,
REDUCE_OP_MAXIMUM_IDX, ZDNN_OK, expected_values);
}
void api_reduce_nchw_max_idx() {
/* Input values as NCHW sized (1,2,2,2)
[[
[[3, 30], [6, 60]],
[[8, 80], [3, 10]]
]]
*/
/* Input values as true NHWC sized (1,2,2,2)
[[
[[3, 8], [30, 80]],
[[6, 3], [60, 10]]
]]
*/
// Values in ZDNN_NCHW order
uint32_t in_shape[] = {1, 2, 2, 2};
float input_values[] = {3, 30, 6, 60, 8, 80, 3, 10};
/* Expected values as true NCHW sized (1,1,2,2)
[[
[[1, 1]], [[0, 0]]
]]
*/
uint32_t out_shape[] = {1, 1, 2, 2};
uint32_t expected_values[] = {1, 1, 0, 0};
zdnn_reduce_idx_test(in_shape, ZDNN_NCHW, input_values, out_shape,
REDUCE_OP_MAXIMUM_IDX, ZDNN_OK, expected_values);
}
int main() {
UNITY_BEGIN();
RUN_TEST_ALL_DLFLOAT16_PRE_DATATYPES(api_reduce_basic_min);
RUN_TEST_ALL_DLFLOAT16_PRE_DATATYPES(api_reduce_nchw_min);
RUN_TEST_ALL_DLFLOAT16_PRE_DATATYPES(api_reduce_basic_min_idx);
RUN_TEST_ALL_DLFLOAT16_PRE_DATATYPES(api_reduce_nchw_min_idx);
RUN_TEST_ALL_DLFLOAT16_PRE_DATATYPES(api_reduce_basic_max);
RUN_TEST_ALL_DLFLOAT16_PRE_DATATYPES(api_reduce_nchw_max);
RUN_TEST_ALL_DLFLOAT16_PRE_DATATYPES(api_reduce_basic_max_idx);
RUN_TEST_ALL_DLFLOAT16_PRE_DATATYPES(api_reduce_nchw_max_idx);
return UNITY_END();
}
zDNN-1.1.2/tests/testDriver_zdnn_relu.c 0000664 0000000 0000000 00000027510 15000221702 0020037 0 ustar 00root root 0000000 0000000 // SPDX-License-Identifier: Apache-2.0
/*
* Copyright IBM Corp. 2021, 2024
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#include "common_act.h"
// -----------------------------------------------------------------------------
// ReLU Unit Testing, for convenience, recall the following:
// relu(x) -> if (x>0) {return x; else return 0;}
// -----------------------------------------------------------------------------
void setUp(void) { VERIFY_HW_ENV; }
void tearDown(void) {}
/**
* zdnn_relu_test
*
* Handles all the logic to run custom tests.
*/
void zdnn_relu_test(uint32_t *io_dims, zdnn_data_layouts layout, float *input,
float *clipping_value, zdnn_status expected_status,
float *expected_values) {
/*
* Input Tensor
*/
zdnn_ztensor *input_ztensor = alloc_ztensor_with_values(
io_dims, layout, test_datatype, NO_CONCAT, false, input);
/*
* Output Tensor
*/
zdnn_ztensor *output_ztensor = alloc_ztensor_with_values(
io_dims, layout, test_datatype, NO_CONCAT, true, ZERO_ARRAY);
/*
* Begin Testing!
*/
zdnn_status status = zdnn_relu(input_ztensor, clipping_value, output_ztensor);
TEST_ASSERT_MESSAGE_FORMATTED(
status == expected_status,
"call to zdnn_relu() to returned status %08x but expected %08x\n",
status, expected_status);
if (expected_status == ZDNN_OK) {
assert_ztensor_values(output_ztensor, false, expected_values);
}
// All done--clean up the tensor buffers
free_ztensor_buffers(2, input_ztensor, output_ztensor);
}
/*
-------------------------------------------------------------------------------
ReLU Basic
Layout: NHWC
-------------------------------------------------------------------------------
*/
/**
* zdnn_relu_basic_nhwc_basic
*
* Simple test of all positive input values
* Expect a mirror of the Input values as the Output values
*
* Input values as NHWC
* [[
* [[1], [2], [3]],
* [[4], [5], [6]],
* [[7], [8], [9]]
* ]]
*
* Expected Output values as NHWC
* [[
* [[1], [2], [3]],
* [[4], [5], [6]],
* [[7], [8], [9]]
* ]]
*/
void zdnn_relu_basic_nhwc_basic() {
// Initialize the dimensions for our input tensor ZDNN_NHWC
uint32_t shape[] = {1, 3, 3, 1}; // Will be same for in and out dim.
float input_expected_values[] = {1, 2, 3, 4, 5, 6, 7, 8, 9};
float clip_value = 0;
zdnn_relu_test(shape, ZDNN_NHWC, input_expected_values, &clip_value, ZDNN_OK,
input_expected_values);
}
/**
* zdnn_relu_basic_nhwc_basic_clip6
*
* Simple test of all positive input values
* Expect a mirror of the Input values as the Output values
*
* Input values as NHWC
* [[
* [[1], [2], [3]],
* [[4], [5], [6]],
* [[7], [8], [9]]
* ]]
*
* Expected Output values as NHWC
* [[
* [[1], [2], [3]],
* [[4], [5], [6]],
* [[6], [6], [6]]
* ]]
*/
void zdnn_relu_basic_nhwc_basic_clip6() {
// Initialize the dimensions for our input tensor ZDNN_NHWC
uint32_t shape[] = {1, 3, 3, 1}; // Will be same for in and out dim.
float input_expected_values[] = {1, 2, 3, 4, 5, 6, 6, 6, 6};
float clip_value = 6;
zdnn_relu_test(shape, ZDNN_NHWC, input_expected_values, &clip_value, ZDNN_OK,
input_expected_values);
}
/*
-------------------------------------------------------------------------------
ReLU Basic
Layout: ZDNN_3D
-------------------------------------------------------------------------------
*/
/**
* zdnn_relu_deadneuron_3d_basic
*
* Simple test of all negative input values
* Expect a dead neuron
*
* Input values as NWC sized (3,3,2):
* [[
* [[-1, -10], [-2, -20], [-3, -30]],
* [[-4, -40], [-5, -50], [-6, -60]],
* [[-7, -70], [-8, -80], [-9, -90]]
* ]]
*
* Expected Output values as NWC sized (3,3,2):
* [[
* [[0, 0], [0, 0], [0, 0]],
* [[0, 0], [0, 0], [0, 0]],
* [[0, 0], [0, 0], [0, 0]]
* ]]
*/
void zdnn_relu_deadneuron_3d_basic() {
// Initialize the dimensions for our input tensor ZDNN_3D
uint32_t shape[] = {3, 3, 2}; // Will be same for in and out dim.
float input_values[] = {-1, -10, -2, -20, -3, -30, -4, -40, -5,
-50, -6, -60, -7, -70, -8, -80, -9, -90};
float expected_values[] = {0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0};
zdnn_relu_test(shape, ZDNN_3D, input_values, NULL, ZDNN_OK, expected_values);
}
/*
-------------------------------------------------------------------------------
ReLU Basic
Layout: NHWC
-------------------------------------------------------------------------------
*/
/**
* zdnn_relu_balance_nhwc_basic
*
* Simple test of half positive and half negative input values
* Expect 50% zeroed 50% valued
*
* Input values as NHWC
* [[
* [[10, -10], [20, -20], [30, -30]],
* [[40, -40], [50, -50], [60, -60]],
* [[70, -70], [80, -80], [90, -90]],
* ]]
*
* Expected Output values as NHWC
* [[
* [[10, 0], [20, 0], [30, 0]],
* [[40, 0], [50, 0], [60, 0]],
* [[70, 0], [80, 0], [90, 0]],
* ]]
*/
void zdnn_relu_balance_nhwc_basic() {
// Initialize the dimensions for our input tensor
uint32_t shape[] = {1, 3, 3, 2}; // Will be same for in and out dim.
float input_values[] = {10, -10, 20, -20, 30, -30, 40, -40, 50,
-50, 60, -60, 70, -70, 80, -80, 90, -90};
float expected_values[] = {10, 0, 20, 0, 30, 0, 40, 0, 50,
0, 60, 0, 70, 0, 80, 0, 90, 0};
zdnn_relu_test(shape, ZDNN_NHWC, input_values, NULL, ZDNN_OK,
expected_values);
}
/*
-------------------------------------------------------------------------------
ReLU Basic
Layout: NHWC
-------------------------------------------------------------------------------
*/
/**
* zdnn_relu_balance_nhwc_basic_clip50
*
* Simple test of half positive and half negative input values
* Expect 50% zeroed 50% valued
*
* Input values as NHWC
* [[
* [[10, -10], [20, -20], [30, -30]],
* [[40, -40], [50, -50], [60, -60]],
* [[70, -70], [80, -80], [90, -90]],
* ]]
*
* Expected Output values as NHWC
* [[
* [[10, 0], [20, 0], [30, 0]],
* [[40, 0], [50, 0], [50, 0]],
* [[50, 0], [50, 0], [50, 0]],
* ]]
*/
void zdnn_relu_balance_nhwc_basic_clip50() {
// Initialize the dimensions for our input tensor
uint32_t shape[] = {1, 3, 3, 2}; // Will be same for in and out dim.
float input_values[] = {10, -10, 20, -20, 30, -30, 40, -40, 50,
-50, 60, -60, 70, -70, 80, -80, 90, -90};
float expected_values[] = {10, 0, 20, 0, 30, 0, 40, 0, 50,
0, 50, 0, 50, 0, 50, 0, 50, 0};
float clip_value = 50;
zdnn_relu_test(shape, ZDNN_NHWC, input_values, &clip_value, ZDNN_OK,
expected_values);
}
/*
-------------------------------------------------------------------------------
ReLU Large
Layout: NHWC
-------------------------------------------------------------------------------
*/
/**
* zdnn_relu_basic_nhwc_large
*
* Simple test of all positive input values
* Expect a mirror of the Input values as the Output values
*
* Input values as NHWC
* [[
* [[65000, 65100, 65200], [64000, 64100, 64200], [63000, 63100, 63200]],
* [[62000, 62100, 62200], [61000, 61100, 61200], [60000, 60100, 60200]],
* [[59000, 59100, 59200], [58000, 58100, 58200], [57000, 57100, 57200]]
* ]]
*
* Expected Output values as NHWC
* [[
* [[65000, 65100, 65200], [64000, 64100, 64200], [63000, 63100, 63200]],
* [[62000, 62100, 62200], [61000, 61100, 61200], [60000, 60100, 60200]],
* [[59000, 59100, 59200], [58000, 58100, 58200], [57000, 57100, 57200]]
* ]]
*
*/
void zdnn_relu_basic_nhwc_large() {
// Initialize the dimensions for our input tensor ZDNN_NHWC
uint32_t shape[] = {1, 3, 3, 3}; // Will be same for in and out dim.
float input_expected_values[] = {
65000, 65100, 65200, 64000, 64100, 64200, 63000, 63100, 63200,
62000, 62100, 62200, 61000, 61100, 61200, 60000, 60100, 60200,
59000, 59100, 59200, 58000, 58100, 58200, 57000, 57100, 57200};
zdnn_relu_test(shape, ZDNN_NHWC, input_expected_values, NULL, ZDNN_OK,
input_expected_values);
}
/*
-------------------------------------------------------------------------------
ReLU Large
Layout: ZDNN_3D
-------------------------------------------------------------------------------
*/
/**
* zdnn_relu_deadneuron_3d_large
*
* Simple test of all negative input values
* Expect a dead neuron
*
* Generate a test that is of size 8x8x8
* and use automatic float generator to create
* input values.
*
* Output will contain tensor of size size 8x8x8
* with all 0 zeros.
*/
void zdnn_relu_deadneuron_3d_large() {
// Initialize the dimensions for our input tensor ZDNN_3D
uint32_t shape[] = {8, 8, 8}; // Will be same for in and out dim.
int num_io_buffer_values = shape[0] * shape[1] * shape[2];
float input_values[num_io_buffer_values];
gen_random_float_array_neg(num_io_buffer_values, input_values);
float expected_values[num_io_buffer_values];
gen_float_array_zeros(num_io_buffer_values, expected_values);
zdnn_relu_test(shape, ZDNN_3D, input_values, NULL, ZDNN_OK, expected_values);
}
/*
-------------------------------------------------------------------------------
ReLU Large
Layout: NHWC
-------------------------------------------------------------------------------
*/
/**
* zdnn_relu_balance_nhwc_large
*
* Simple test of half positive and half negative input values
* Expect 50% zeroed 50% valued
*
* Generate a test that is of size 50x25x10x1
* and use automatic float generator to create
* input values.
*
* Output will contain tensor of size size 50x25x10x1
* with 50% zeros 50% valued.
*
*
*/
void zdnn_relu_balance_nhwc_large() {
// Initialize the dimensions for our input tensor
uint32_t shape[] = {1, 10, 25, 50}; // Will be same for in and out dim.
int num_io_buffer_values = shape[0] * shape[1] * shape[2] * shape[3];
float input_values[num_io_buffer_values];
gen_random_float_array_pos_neg(num_io_buffer_values, input_values);
float expected_values[num_io_buffer_values];
copy_to_array(num_io_buffer_values, input_values, expected_values);
fill_everyother_with_zero_float_array(num_io_buffer_values, expected_values);
zdnn_relu_test(shape, ZDNN_NHWC, input_values, NULL, ZDNN_OK,
expected_values);
}
int main() {
UNITY_BEGIN();
RUN_TEST_ALL_DLFLOAT16_PRE_DATATYPES(zdnn_relu_basic_nhwc_basic);
RUN_TEST_ALL_DLFLOAT16_PRE_DATATYPES(zdnn_relu_basic_nhwc_large);
RUN_TEST_ALL_DLFLOAT16_PRE_DATATYPES(zdnn_relu_deadneuron_3d_basic);
RUN_TEST_ALL_DLFLOAT16_PRE_DATATYPES(zdnn_relu_balance_nhwc_basic);
RUN_TEST_ALL_DLFLOAT16_PRE_DATATYPES(zdnn_relu_deadneuron_3d_large);
RUN_TEST_ALL_DLFLOAT16_PRE_DATATYPES(zdnn_relu_balance_nhwc_large);
RUN_TEST_ALL_DLFLOAT16_PRE_DATATYPES(zdnn_relu_basic_nhwc_basic_clip6);
RUN_TEST_ALL_DLFLOAT16_PRE_DATATYPES(zdnn_relu_balance_nhwc_basic_clip50);
return UNITY_END();
}
zDNN-1.1.2/tests/testDriver_zdnn_sigmoid.c 0000664 0000000 0000000 00000024337 15000221702 0020527 0 ustar 00root root 0000000 0000000 // SPDX-License-Identifier: Apache-2.0
/*
* Copyright IBM Corp. 2021, 2024
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#include "common_act.h"
#include
// -----------------------------------------------------------------------------
// Sigmoid Unit Testing, for convenience, recall the following:
// sigmoid(x) -> [0,1]
// For some value x, we squash that value to some real-valued number within
// range [0,1].
// For the behind the scenes:
// sigmoid(x) -> ( 1 / (1 + e(-x)) )
// https://mathworld.wolfram.com/SigmoidFunction.html
// -----------------------------------------------------------------------------
void setUp(void) { VERIFY_HW_ENV; }
void tearDown(void) {}
/**
* Helper function to compute output tensor values using activation
* sigmoid
*/
void act_sigmoid(const float input[], float output[], int num_elems) {
for (long i = 0; i < num_elems; i++) {
output[i] = 1 / (1 + exp(-input[i]));
}
}
/**
* zdnn_sigmoid_test
*
* Handles all the logic to run custom tests.
*/
void zdnn_sigmoid_test(uint32_t *shape, zdnn_data_layouts layout,
float *input_values, zdnn_status expected_status,
float *expected_values) {
/*
* Input Tensor
*/
zdnn_ztensor *input_ztensor = alloc_ztensor_with_values(
shape, layout, test_datatype, NO_CONCAT, false, input_values);
/*
* Output Tensor
*/
zdnn_ztensor *output_ztensor = alloc_ztensor_with_values(
shape, layout, test_datatype, NO_CONCAT, true, ZERO_ARRAY);
/*
* Begin Testing!
*/
zdnn_status status = zdnn_sigmoid(input_ztensor, output_ztensor);
TEST_ASSERT_MESSAGE_FORMATTED(
status == expected_status,
"call to zdnn_sigmoid() to returned status %08x but expected %08x\n",
status, expected_status);
if (expected_status == ZDNN_OK) {
assert_ztensor_values(output_ztensor, false, expected_values);
}
// All done--clean up the tensor buffers
free_ztensor_buffers(2, input_ztensor, output_ztensor);
}
/*
-------------------------------------------------------------------------------
Sigmoid Basic
Layout: NHWC
-------------------------------------------------------------------------------
*/
/**
* zdnn_sigmoid_basic_nhwc
*
* Simple test to demonstrate tanh
*
* Input values as NHWC sized (1,3,3,1):
* [[
* [[0], [1], [2]],
* [[3], [4], [5]],
* [[6], [7], [8]]
* ]]
*
* Expected Output values as NHWC sized (1,3,3,1):
* [[
* [[0.5], [0.7310585786], [0.880797078]],
* [[0.9525741268], [0.98201379], [0.9933071491]],
* [[0.9975273768], [0.9990889488], [0.9996646499]
* ]]
*
*/
void zdnn_sigmoid_basic_nhwc() {
// Initialize the dimensions for our input tensor ZDNN_NHWC
uint32_t shape[] = {1, 3, 3, 1}; // Will be same for in and out dim.
float input_values[] = {0, 1, 2, 3, 4, 5, 6, 7, 8};
float expected_values[] = {
0.5, 0.7310585786, 0.880797078, 0.9525741268, 0.98201379,
0.9933071491, 0.9975273768, 0.9990889488, 0.9996646499,
};
zdnn_sigmoid_test(shape, ZDNN_NHWC, input_values, ZDNN_OK, expected_values);
}
/*
-------------------------------------------------------------------------------
Sigmoid Basic
Layout: NHWC
-------------------------------------------------------------------------------
*/
/**
* zdnn_sigmoid_balanced_nhwc
*
* Balanced (pos and neg inputs) test to demonstrate sigmoid
*
*
* Input values as NHWC sized (1,3,3,2):
* [[
* [[-1, 1], [-2, 2], [-3, 3]],
* [[-4, 4], [-5, 5], [-6, 6]],
* [[-7, 7], [-8, 8], [-9, 9]],
* ]]
*
* Expected Output values as NHWC sized 1,3,3,2:
* [[
* [[0.2689414214, 0.7310585786], [0.119202922 , 0.880797078], [0.0474258732,
* 0.9525741268]],
* [[0.01798621, 0.98201379], [0.0066928509, 0.9933071491],[0.0024726232,
* 0.9975273768]],
* [[0.0009110512, 0.9990889488], [0.0003353501, 0.9996646499],[0.0001233946,
* 0.9998766054]],
* ]]
*/
void zdnn_sigmoid_balanced_nhwc() {
// Initialize the dimensions for our input tensor ZDNN_NHWC
uint32_t shape[] = {1, 3, 3, 2}; // Will be same for in and out dim.
float input_values[] = {-1, 1, -2, 2, -3, 3, -4, 4, -5,
5, -6, 6, -7, 7, -8, 8, -9, 9};
float expected_values[] = {
0.2689414214, 0.7310585786, 0.119202922, 0.880797078, 0.0474258732,
0.9525741268, 0.01798621, 0.98201379, 0.0066928509, 0.9933071491,
0.0024726232, 0.9975273768, 0.0009110512, 0.9990889488, 0.0003353501,
0.9996646499, 0.0001233946, 0.9998766054,
};
zdnn_sigmoid_test(shape, ZDNN_NHWC, input_values, ZDNN_OK, expected_values);
}
/*
-------------------------------------------------------------------------------
Sigmoid Basic
Layout: ZDNN_3D
-------------------------------------------------------------------------------
*/
/**
* zdnn_sigmoid_negative_3d
*
* Simple test to demonstrate tanh
*
* Input values as NWC sized (1,2,4):
* [[
* [[-1, -2, -3, -4], [-5, -6, -7, -8]],
* ]]
*
* Expected Output values as NWC sized (1,2,4):
* [[
* [[0.2689414214, 0.119202922, 0.0474258732, 0.01798621],
* [0.0066928509, 0.0024726232, 0.0009110512 , 0.0003353501]],
* ]]
*/
void zdnn_sigmoid_negative_3d() {
// Initialize the dimensions for our input tensor ZDNN_3D
uint32_t shape[] = {1, 2, 4}; // Will be same for in and out dim.
float input_values[] = {-1, -2, -3, -4, -5, -6, -7, -8};
float expected_values[] = {
0.2689414214, 0.119202922, 0.0474258732, 0.01798621,
0.0066928509, 0.0024726232, 0.0009110512, 0.0003353501,
};
zdnn_sigmoid_test(shape, ZDNN_3D, input_values, ZDNN_OK, expected_values);
}
/*
-------------------------------------------------------------------------------
Sigmoid Large
Layout: NHWC
-------------------------------------------------------------------------------
*/
/**
* zdnn_sigmoid_basic_nhwc_large
*
* Simple test of all positive input values
*
* Input values as NHWC sized (3,3,3,1):
* [[
* [[65000, 65100, 65200], [64000, 64100, 64200], [63000, 63100, 63200]],
* [[62000, 62100, 62200], [61000, 61100, 61200], [60000, 60100, 60200]],
* [[59000, 59100, 59200], [58000, 58100, 58200], [57000, 57100, 57200]]
* ]]
*
* Expected Output values as NHWC sized (3,3,3,1):
*
*/
void zdnn_sigmoid_basic_nhwc_large() {
// Initialize the dimensions for our input tensor ZDNN_NHWC
uint32_t shape[] = {1, 3, 3, 3}; // Will be same for in and out dim.
int num_io_buffer_values = shape[0] * shape[1] * shape[2] * shape[3];
float input_values[] = {65000, 65100, 65200, 64000, 64100, 64200, 63000,
63100, 63200, 62000, 62100, 62200, 61000, 61100,
61200, 60000, 60100, 60200, 59000, 59100, 59200,
58000, 58100, 58200, 57000, 57100, 57200};
float expected_values[num_io_buffer_values];
act_sigmoid(input_values, expected_values, num_io_buffer_values);
zdnn_sigmoid_test(shape, ZDNN_NHWC, input_values, ZDNN_OK, expected_values);
}
/*
-------------------------------------------------------------------------------
Sigmoid Large
Layout: NHWC
-------------------------------------------------------------------------------
*/
/**
* zdnn_sigmoid_balanced_nhwc_large
*
* Simple test of half positive and half negative input values
*
* Generate a test that is of size 53x30x11x1
* and use automatic float generator to create
* input values.
*
* Output will contain tensor of size size 53x30x11x1
*/
void zdnn_sigmoid_balanced_nhwc_large() {
// Initialize the dimensions for our input tensor ZDNN_NHWC
uint32_t shape[] = {1, 4, 20, 12}; // Will be same for in and out dim.
int num_io_buffer_values = shape[0] * shape[1] * shape[2] * shape[3];
float input_values[num_io_buffer_values];
gen_random_float_array_pos_neg(num_io_buffer_values, input_values);
float expected_values[num_io_buffer_values];
act_sigmoid(input_values, expected_values, num_io_buffer_values);
zdnn_sigmoid_test(shape, ZDNN_NHWC, input_values, ZDNN_OK, expected_values);
}
/*
-------------------------------------------------------------------------------
Sigmoid Large
Layout: ZDNN_3D
-------------------------------------------------------------------------------
*/
/**
* zdnn_sigmoid_negative_3d_large
*
* Simple test of all negative input values
*
* Generate a test that is of size 78x45x30
* and use automatic float generator to create
* input values.
*
* Output will contain tensor of size size 78x45x30
*/
void zdnn_sigmoid_negative_3d_large() {
// Initialize the dimensions for our input tensor ZDNN_3D
uint32_t shape[] = {10, 6, 22}; // Will be same for in and out dim.
int num_io_buffer_values = shape[0] * shape[1] * shape[2];
float input_values[num_io_buffer_values];
gen_random_float_array_neg(num_io_buffer_values, input_values);
float expected_values[num_io_buffer_values];
act_sigmoid(input_values, expected_values, num_io_buffer_values);
zdnn_sigmoid_test(shape, ZDNN_3D, input_values, ZDNN_OK, expected_values);
}
int main() {
UNITY_BEGIN();
RUN_TEST_ALL_DLFLOAT16_PRE_DATATYPES(zdnn_sigmoid_basic_nhwc);
RUN_TEST_ALL_DLFLOAT16_PRE_DATATYPES(zdnn_sigmoid_basic_nhwc_large);
RUN_TEST_ALL_DLFLOAT16_PRE_DATATYPES(zdnn_sigmoid_balanced_nhwc);
RUN_TEST_ALL_DLFLOAT16_PRE_DATATYPES(zdnn_sigmoid_negative_3d);
RUN_TEST_ALL_DLFLOAT16_PRE_DATATYPES(zdnn_sigmoid_balanced_nhwc_large);
RUN_TEST_ALL_DLFLOAT16_PRE_DATATYPES(zdnn_sigmoid_negative_3d_large);
return UNITY_END();
}
zDNN-1.1.2/tests/testDriver_zdnn_softmax.c 0000664 0000000 0000000 00000042771 15000221702 0020557 0 ustar 00root root 0000000 0000000 // SPDX-License-Identifier: Apache-2.0
/*
* Copyright IBM Corp. 2021, 2024
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#include "common_act.h"
// ------------------------------------------w-----------------------------------
// Softmax Unit Testing, for convenience, recall the following:
// softmax(x) -> [0,1]
// For some value x, we squash that value to some real-valued number within
// range [0,1] -- all components will indeed add up to one, this is mainly
// so thar they can be interpreted as probabilities.
// For the behind the scenes:
// softmax(x) -> ( e(x) / e(x) +e(x) +...+e(x) +e(x) )
// sub 1 sub 2 sub n-1 sub n
// https://en.wikipedia.org/wiki/Softmax_function
// -----------------------------------------------------------------------------
void setUp(void) { VERIFY_HW_ENV; }
void tearDown(void) {}
/**
* zdnn_softmax_test
*
* Handles all the logic to run custom tests.
*/
void zdnn_softmax_test(uint32_t *shape, zdnn_data_layouts layout, float *input,
zdnn_softmax_act act_func, zdnn_status expected_status,
float *expected_values) {
/*
* Input Tensor
*/
zdnn_ztensor *input_ztensor = alloc_ztensor_with_values(
shape, layout, test_datatype, NO_CONCAT, false, input);
/*
* Output Tensor
*/
zdnn_ztensor *output_ztensor = alloc_ztensor_with_values(
shape, layout, test_datatype, NO_CONCAT, true, ZERO_ARRAY);
zdnn_status status;
/*
* Begin Testing!
*/
/* once with NULL workarea, once with self-allocated */
status = zdnn_softmax(input_ztensor, NULL, act_func, output_ztensor);
TEST_ASSERT_MESSAGE_FORMATTED(
status == expected_status,
"call to zdnn_softmax() with activation function %d returned status %08x "
"but expected %08x\n",
act_func, status, expected_status);
if (expected_status == ZDNN_OK) {
assert_ztensor_values(output_ztensor, false, expected_values);
}
zdnn_reset_ztensor(output_ztensor);
void *self_workarea = malloc_aligned_4k(ZDNN_SOFTMAX_SAVEAREA_SIZE);
TEST_ASSERT_MESSAGE_FORMATTED(
self_workarea, "%s() - can't allocate SOFTMAX workarea\n", __func__);
status = zdnn_softmax(input_ztensor, self_workarea, act_func, output_ztensor);
TEST_ASSERT_MESSAGE_FORMATTED(
status == expected_status,
"call to zdnn_softmax() with activation function %d and provided "
"work_area returned status %08x but expected %08x\n",
act_func, status, expected_status);
if (expected_status == ZDNN_OK) {
assert_ztensor_values(output_ztensor, false, expected_values);
}
free_aligned_4k(self_workarea);
free_ztensor_buffers(2, input_ztensor, output_ztensor);
}
/*
-------------------------------------------------------------------------------
Softmax Basic
Layout: 3DS
-------------------------------------------------------------------------------
*/
/**
* zdnn_softmax_basic_3ds_
*
* Simple test of all positive input values
* Expect a mirror of the Input values as the Output values
*
* Input values as 3DS
* [[
* [[0.5], [1.0], [1.5]],
* [[2.0], [2.5], [3.0]],
* [[3.5], [4.0], [4.5]]
* ]]
*
* Expected Output values as 3DS with no activation
* [[
* [[1.0], [1.0], [1.0]],
* [[1.0], [1.0], [1.0]],
* [[1.0], [1.0], [1.0]]
* ]]
*
* Expected Output values as 3DS with log activation
* [[
* [[0.0], [0.0], [0.0]],
* [[0.0], [0.0], [0.0]],
* [[0.0], [0.0], [0.0]]
* ]]
*/
void zdnn_softmax_basic_3ds() {
// Initialize the dimensions for our input tensor ZDNN_3DS
uint32_t shape[] = {3, 3, 1}; // Will be same for in and out dim.
float input_values[] = {0.5, 1.0, 1.5, 2.0, 2.5, 3.0, 3.5, 4.0, 4.5};
float expected_values[] = {1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0};
zdnn_softmax_test(shape, ZDNN_3DS, input_values, SOFTMAX_ACT_NONE, ZDNN_OK,
expected_values);
float log_expected_values[] = {0, 0, 0, 0, 0, 0, 0, 0, 0};
zdnn_softmax_test(shape, ZDNN_3DS, input_values, SOFTMAX_ACT_LOG, ZDNN_OK,
log_expected_values);
}
/*
-------------------------------------------------------------------------------
Softmax Basic
Layout: 3DS
-------------------------------------------------------------------------------
*/
/**
* zdnn_softmax_balanced_3ds_
*
* Balanced (pos and neg inputs) test to demonstrate softmax
*
* Input values as 3DS
* [[
* [[-2, -1.5], [-1, -0.5]],
* [[0.5, 1.0], [1.5, 2.0]],
* ]]
*
* Expected Output values as 3DS with no activation
* [[
* [[0.37754068, 0.62245935], [0.37754068, 0.62245935]],
* [[0.37754068, 0.62245935], [0.37754068, 0.62245935]],
* ]]
*
* Expected Output values as 3DS with log activation
* [[
* [[-0.974077 -0.47407693], [-0.974077 -0.47407693]]
* [[-0.974077 -0.47407693], [-0.974077 -0.47407693]]
* ]]
*/
void zdnn_softmax_balanced_3ds() {
// Initialize the dimensions for our input tensor ZDNN_3DS
uint32_t shape[] = {2, 2, 2}; // Will be same for in and out dim.
float input_values[] = {-2, -1.5, -1, -0.5, 0.5, 1, 1.5, 2};
float expected_values[] = {0.37754068, 0.62245935, 0.37754068, 0.62245935,
0.37754068, 0.62245935, 0.37754068, 0.62245935};
zdnn_softmax_test(shape, ZDNN_3DS, input_values, SOFTMAX_ACT_NONE, ZDNN_OK,
expected_values);
float log_expected_values[] = {-0.974077, -0.47407693, -0.974077,
-0.47407693, -0.974077, -0.47407693,
-0.974077, -0.47407693};
zdnn_softmax_test(shape, ZDNN_3DS, input_values, SOFTMAX_ACT_LOG, ZDNN_OK,
log_expected_values);
}
/*
-------------------------------------------------------------------------------
Softmax Basic
Layout: ZDNN_3D
-------------------------------------------------------------------------------
*/
/**
* zdnn_softmax_negative_3ds_
*
* Negative test to demonstrate tanh
*
* Input values as NWC sized (1,1,8):
* [[
* [[-1.4, -2.8, -3.12, -4.16, -5.20, -6.24, -7.28, -8.32]],
* ]]
*
* Expected Output values as NWC sized (1,1,8) with no activation:
* [[
* [[0.656592, 0.161914, 0.117573,
* 0.041557, 0.014688, 0.005192,
* 0.001835 , 0.000649]],
* ]]
*
* Expected Output values as NWC sized (1,1,8) with log activation:
* [[
* [[-0.42069218, -1.8206921, -2.140692,
* -3.180692, -4.2206917, -5.260692,
* -6.300692, -7.3406916]],
* ]]
*
*/
void zdnn_softmax_negative_3ds() {
// Initialize the dimensions for our input tensor--ZDNN_3DS [C,W,N]
uint32_t shape[] = {1, 1, 8}; // Will be same for in and out dim.
float input_values[] = {-1.4, -2.8, -3.12, -4.16, -5.20, -6.24, -7.28, -8.32};
float expected_values[] = {0.656592, 0.161914, 0.117573, 0.041557,
0.014688, 0.005192, 0.001835, 0.000649};
zdnn_softmax_test(shape, ZDNN_3DS, input_values, SOFTMAX_ACT_NONE, ZDNN_OK,
expected_values);
float log_expected_values[] = {-0.42069218, -1.8206921, -2.140692,
-3.180692, -4.2206917, -5.260692,
-6.300692, -7.3406916};
zdnn_softmax_test(shape, ZDNN_3DS, input_values, SOFTMAX_ACT_LOG, ZDNN_OK,
log_expected_values);
}
/*
-------------------------------------------------------------------------------
Softmax Large
Layout: 3DS
-------------------------------------------------------------------------------
*/
/**
* zdnn_softmax_basic_3ds_large
*
* Simple test of all positive input values
* Expect a mirror of the Input values as the Output values
*
* Input values as 3DS
* [[
* [[0.65536, 0.65100, 0.65200],
* [0.64000, 0.64100, 0.64200],
* [0.63000, 0.63100, 0.63200]],
* [[0.62000, 0.62100, 0.62200],
* [0.61000, 0.61100, 0.61200],
* [0.60000, 0.60100, 0.60200]],
* [[0.59000, 0.59100, 0.59200],
* [0.58000, 0.58100, 0.58200],
* [0.57000, 0.57100, 0.57200]]
* ]]
*
* Expected Output values as 3DS with no activation
* [[
* [[0.33419162, 0.3327377, 0.33307064]
* [0.33300006, 0.33333322, 0.33366674]
* [0.33300006, 0.33333322, 0.33366674]]
* [[0.33300006, 0.3333332, 0.3336667]
* [0.33300006, 0.3333332, 0.3336667]
* [0.33300006, 0.3333332, 0.3336667]]
* [[0.33300003, 0.3333332, 0.3336667]
* [0.33300006, 0.33333322, 0.33366674]
* [0.33300006, 0.33333322, 0.33366674]]
* ]]
*
* Expected Output values as 3DS with log activation
* [[
* [[-1.0960407 -1.1004007 -1.0994008]
* [-1.0996126 -1.0986125 -1.0976126]
* [-1.0996126 -1.0986125 -1.0976126]]
* [[-1.0996126 -1.0986127 -1.0976126]
* [-1.0996126 -1.0986127 -1.0976126]
* [-1.0996126 -1.0986127 -1.0976126]]
* [[-1.0996127 -1.0986127 -1.0976126]
* [-1.0996126 -1.0986125 -1.0976126]
* [-1.0996126 -1.0986127 -1.0976126]]
* ]]
*/
void zdnn_softmax_basic_3ds_large() {
// Initialize the dimensions for our input tensor ZDNN_3DS
uint32_t shape[] = {3, 3, 3};
float input_values[] = {0.65536, 0.65100, 0.65200, 0.64000, 0.64100, 0.64200,
0.63000, 0.63100, 0.63200, 0.62000, 0.62100, 0.62200,
0.61000, 0.61100, 0.61200, 0.60000, 0.60100, 0.60200,
0.59000, 0.59100, 0.59200, 0.58000, 0.58100, 0.58200,
0.57000, 0.57100, 0.57200};
float expected_values[] = {
0.33419162, 0.3327377, 0.33307064, 0.33300006, 0.33333322, 0.33366674,
0.33300006, 0.33333322, 0.33366674, 0.33300006, 0.3333332, 0.3336667,
0.33300006, 0.3333332, 0.3336667, 0.33300006, 0.3333332, 0.3336667,
0.33300003, 0.3333332, 0.3336667, 0.33300006, 0.33333322, 0.33366674,
0.33300006, 0.33333322, 0.33366674};
zdnn_softmax_test(shape, ZDNN_3DS, input_values, SOFTMAX_ACT_NONE, ZDNN_OK,
expected_values);
float log_expected_values[] = {
-1.0960407, -1.1004007, -1.0994008, -1.0996126, -1.0986125, -1.0976126,
-1.0996126, -1.0986125, -1.0976126, -1.0996126, -1.0986127, -1.0976126,
-1.0996126, -1.0986127, -1.0976126, -1.0996126, -1.0986127, -1.0976126,
-1.0996127, -1.0986127, -1.0976126, -1.0996126, -1.0986125, -1.0976126,
-1.0996126, -1.0986127, -1.0976126};
zdnn_softmax_test(shape, ZDNN_3DS, input_values, SOFTMAX_ACT_LOG, ZDNN_OK,
log_expected_values);
}
/*
-------------------------------------------------------------------------------
Softmax Large
Layout: 3DS
-------------------------------------------------------------------------------
*/
/**
* zdnn_softmax_balanced_3ds_large
*
* Input values as 3DS
* [[[ 0.9356609 , 1.0854305 , -0.93788373],
* [-0.5061547 , 1.3169702 , 0.7137579 ]],
* [[-0.4126717 , -0.40257987, 2.0713255 ],
* [-0.35911667, 0.3861619 , 1.9897066 ]],
* [[-0.2823396 , -0.5135972 , -0.8962833 ],
* [-0.0901652 , -0.73964226, -0.46269894]],
* [[ 0.42379895, 1.1180195 , 1.4442351 ],
* [-1.0771092 , 0.9014347 , -0.14529487]],
* [[ 1.173365 , 1.510687 , -0.46714714],
* [ 1.3281798 , 1.7365712 , -1.5435543 ]],
* [[ 0.35064182, 0.5708492 , -1.8452454 ],
* [ 0.9243176 , 0.57233644, -1.0959795 ]],
* [[-0.62557054, 0.686686 , 0.4222773 ],
* [-0.2146352 , -0.81243026, -1.1678637 ]],
* [[ 1.6384528 , 1.187959 , -2.5538385 ],
* [-0.39338952, 0.233341 , -1.6181145 ]],
* [[-0.8736809 , 0.05150718, 2.2328985 ],
* [ 2.8749912 , 0.08306922, -0.9871888 ]],
* [[ 0.47143334, -1.7806206 , -0.27681163],
* [-0.9240901 , 1.3088665 , 0.7826533 ]]]
*
* Expected Output values as 3DS with no activation
* [[
* [[0.43193838, 0.5017252, 0.06633637],
* [0.09453523, 0.5852842, 0.32018057]],
* [[0.07143247, 0.07215702, 0.85641056],
* [0.07363626, 0.15515368, 0.7712101 ]],
* [[0.42831188, 0.3398805, 0.23180765],
* [0.45222163, 0.23620388, 0.31157458]],
* [[0.17311363, 0.3465991, 0.48028725],
* [0.09283915, 0.67143184, 0.23572904]],
* [[0.38534594, 0.5399429, 0.07471115],
* [0.390473, 0.58742595, 0.02210104]],
* [[0.42416108, 0.5286468, 0.04719208],
* [0.5446892, 0.38307628, 0.07223454]],
* [[0.13216929, 0.49094895, 0.37688172],
* [0.51665765, 0.28417364, 0.19916865]],
* [[0.6051712, 0.3856837, 0.00914512],
* [0.31592378, 0.5912456, 0.09283058]],
* [[0.03865956, 0.09751265, 0.86382776],
* [0.9239366, 0.05664035, 0.01942311]],
* [[0.6335613, 0.06663986, 0.29979888],
* [0.06313774, 0.5889111, 0.34795114]]
* ]]
*
* Expected Output values as 3DS with log activation
* [[
* [[-0.83947235 -0.68970275 -2.713017 ]
* [-2.3587828 -0.53565776 -1.1388701 ]]
* [[-2.6390028 -2.6289108 -0.1550054 ]
* [-2.6086178 -1.8633392 -0.25979444]]
* [[-0.84790367 -1.0791612 -1.4618473 ]
* [-0.79358286 -1.4430599 -1.1661166 ]]
* [[-1.7538071 -1.0595865 -0.7333709 ]
* [-2.3768868 -0.39834276 -1.4450723 ]]
* [[-0.9536138 -0.6162919 -2.594126 ]
* [-0.9403964 -0.5320051 -3.8121307 ]]
* [[-0.857642 -0.6374347 -3.0535293 ]
* [-0.60753995 -0.9595212 -2.627837 ]]
* [[-2.0236716 -0.7114151 -0.9758239 ]
* [-0.6603748 -1.2581699 -1.6136034 ]]
* [[-0.5022439 -0.9527377 -4.6945353 ]
* [-1.1522543 -0.5255238 -2.376979 ]]
* [[-3.2529612 -2.327773 -0.14638188]
* [-0.07911182 -2.8710337 -3.9412918 ]]
* [[-0.4563985 -2.7084525 -1.2046435 ]
* [-2.7624366 -0.52948 -1.0556931 ]]]
* ]]
*/
void zdnn_softmax_balanced_3ds_large() {
// Initialize the dimensions for our input tensor ZDNN_3DS
uint32_t shape[] = {10, 2, 3}; // Will be same for in and out dim.
float input_values[] = {
0.9356609, 1.0854305, -0.93788373, -0.5061547, 1.3169702,
0.7137579, -0.4126717, -0.40257987, 2.0713255, -0.35911667,
0.3861619, 1.9897066, -0.2823396, -0.5135972, -0.8962833,
-0.0901652, -0.73964226, -0.46269894, 0.42379895, 1.1180195,
1.4442351, -1.0771092, 0.9014347, -0.14529487, 1.173365,
1.510687, -0.46714714, 1.3281798, 1.7365712, -1.5435543,
0.35064182, 0.5708492, -1.8452454, 0.9243176, 0.57233644,
-1.0959795, -0.62557054, 0.686686, 0.4222773, -0.2146352,
-0.81243026, -1.1678637, 1.6384528, 1.187959, -2.5538385,
-0.39338952, 0.233341, -1.6181145, -0.8736809, 0.05150718,
2.2328985, 2.8749912, 0.08306922, -0.9871888, 0.47143334,
-1.7806206, -0.27681163, -0.9240901, 1.3088665, 0.7826533};
float expected_values[] = {
0.43193838, 0.5017252, 0.06633637, 0.09453523, 0.5852842, 0.32018057,
0.07143247, 0.07215702, 0.85641056, 0.07363626, 0.15515368, 0.7712101,
0.42831188, 0.3398805, 0.23180765, 0.45222163, 0.23620388, 0.31157458,
0.17311363, 0.3465991, 0.48028725, 0.09283915, 0.67143184, 0.23572904,
0.38534594, 0.5399429, 0.07471115, 0.390473, 0.58742595, 0.02210104,
0.42416108, 0.5286468, 0.04719208, 0.5446892, 0.38307628, 0.07223454,
0.13216929, 0.49094895, 0.37688172, 0.51665765, 0.28417364, 0.19916865,
0.6051712, 0.3856837, 0.00914512, 0.31592378, 0.5912456, 0.09283058,
0.03865956, 0.09751265, 0.86382776, 0.9239366, 0.05664035, 0.01942311,
0.6335613, 0.06663986, 0.29979888, 0.06313774, 0.5889111, 0.34795114};
zdnn_softmax_test(shape, ZDNN_3DS, input_values, SOFTMAX_ACT_NONE, ZDNN_OK,
expected_values);
float log_expected_values[] = {
-0.83947235, -0.68970275, -2.713017, -2.3587828, -0.53565776,
-1.1388701, -2.6390028, -2.6289108, -0.1550054, -2.6086178,
-1.8633392, -0.25979444, -0.84790367, -1.0791612, -1.4618473,
-0.79358286, -1.4430599, -1.1661166, -1.7538071, -1.0595865,
-0.7333709, -2.3768868, -0.39834276, -1.4450723, -0.9536138,
-0.6162919, -2.594126, -0.9403964, -0.5320051, -3.8121307,
-0.857642, -0.6374347, -3.0535293, -0.60753995, -0.9595212,
-2.627837, -2.0236716, -0.7114151, -0.9758239, -0.6603748,
-1.2581699, -1.6136034, -0.5022439, -0.9527377, -4.6945353,
-1.1522543, -0.5255238, -2.376979, -3.2529612, -2.327773,
-0.14638188, -0.07911182, -2.8710337, -3.9412918, -0.4563985,
-2.7084525, -1.2046435, -2.7624366, -0.52948, -1.0556931};
zdnn_softmax_test(shape, ZDNN_3DS, input_values, SOFTMAX_ACT_LOG, ZDNN_OK,
log_expected_values);
}
int main() {
UNITY_BEGIN();
RUN_TEST_ALL_DLFLOAT16_PRE_DATATYPES(zdnn_softmax_basic_3ds);
RUN_TEST_ALL_DLFLOAT16_PRE_DATATYPES(zdnn_softmax_basic_3ds_large);
RUN_TEST_ALL_DLFLOAT16_PRE_DATATYPES(zdnn_softmax_balanced_3ds);
RUN_TEST_ALL_DLFLOAT16_PRE_DATATYPES(zdnn_softmax_negative_3ds);
RUN_TEST_ALL_DLFLOAT16_PRE_DATATYPES(zdnn_softmax_balanced_3ds_large);
return UNITY_END();
}
zDNN-1.1.2/tests/testDriver_zdnn_softmax_mask.c 0000664 0000000 0000000 00000111675 15000221702 0021572 0 ustar 00root root 0000000 0000000 // SPDX-License-Identifier: Apache-2.0
/*
* Copyright IBM Corp. 2023, 2024
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#include "common_act.h"
// -----------------------------------------------------------------------------
// SoftmaxMask Unit Testing, for convenience, recall the following:
// softmax(x) -> [0,1]
// For some value x, we squash that value to some real-valued number within
// range [0,1] -- all components will indeed add up to one, this is mainly
// so thar they can be interpreted as probabilities.
// For the behind the scenes:
// softmax(x) -> ( e(x) / e(x) +e(x) +...+e(x) +e(x) )
// sub 1 sub 2 sub n-1 sub n
// https://en.wikipedia.org/wiki/Softmax_function
//
// When mask > 0, only inidices (0, mask) are used for the righ-most dim.
// -----------------------------------------------------------------------------
void setUp(void) {
VERIFY_HW_ENV;
VERIFY_PARMBLKFORMAT_1;
}
void tearDown(void) {}
/**
* zdnn_softmax_mask_test
*
* Handles all the logic to run custom tests.
*/
void zdnn_softmax_mask_test(uint32_t *shape, zdnn_data_layouts layout,
float *input, zdnn_softmax_act act_func,
uint32_t softmax_mask, zdnn_status expected_status,
float *expected_values) {
/*
* Input Tensor
*/
zdnn_ztensor *input_ztensor = alloc_ztensor_with_values(
shape, layout, test_datatype, NO_CONCAT, false, input);
/*
* Output Tensor
*/
zdnn_ztensor *output_ztensor = alloc_ztensor_with_values(
shape, layout, test_datatype, NO_CONCAT, true, ZERO_ARRAY);
zdnn_status status;
/*
* Begin Testing!
*/
/* once with NULL workarea, once with self-allocated */
status = zdnn_softmax_mask(input_ztensor, NULL, act_func, softmax_mask,
output_ztensor);
TEST_ASSERT_MESSAGE_FORMATTED(
status == expected_status,
"call to zdnn_softmax_mask() with activation function %d and mask %u "
"returned status %08x but expected %08x\n",
act_func, softmax_mask, status, expected_status);
if (expected_status == ZDNN_OK) {
assert_ztensor_values(output_ztensor, false, expected_values);
}
zdnn_reset_ztensor(output_ztensor);
void *self_workarea = malloc_aligned_4k(ZDNN_SOFTMAX_SAVEAREA_SIZE);
TEST_ASSERT_MESSAGE_FORMATTED(
self_workarea, "%s() - can't allocate SOFTMAX workarea\n", __func__);
status = zdnn_softmax_mask(input_ztensor, self_workarea, act_func,
softmax_mask, output_ztensor);
TEST_ASSERT_MESSAGE_FORMATTED(
status == expected_status,
"call to zdnn_softmax_mask() with activation function %d, mask %u, and "
"provided work_area returned status %08x but expected %08x\n",
act_func, softmax_mask, status, expected_status);
if (expected_status == ZDNN_OK) {
assert_ztensor_values(output_ztensor, false, expected_values);
}
free_aligned_4k(self_workarea);
free_ztensor_buffers(2, input_ztensor, output_ztensor);
}
/*
------------------------------------------------------------------------------
SoftmaxMask Basic
Layout: 3DS
------------------------------------------------------------------------------
*/
/**
* zdnn_softmax_mask_basic_3ds_
*
* Simple test of all positive input values
* Expect a mirror of the Input values as the Output values
*
* Input values as 3DS
* [[
* [[0.5], [1.0], [1.5]],
* [[2.0], [2.5], [3.0]],
* [[3.5], [4.0], [4.5]]
* ]]
*
* Expected Output values as 3DS with no activation
* [[
* [[1.0], [1.0], [1.0]],
* [[1.0], [1.0], [1.0]],
* [[1.0], [1.0], [1.0]]
* ]]
*
* Expected Output values as 3DS with log activation
* [[
* [[0.0], [0.0], [0.0]],
* [[0.0], [0.0], [0.0]],
* [[0.0], [0.0], [0.0]]
* ]]
*/
void zdnn_softmax_mask_basic_3ds() {
// Initialize the dimensions for our input tensor ZDNN_3DS
uint32_t shape[] = {3, 3, 1}; // Will be same for in and out dim.
float input_values[] = {0.5, 1.0, 1.5, 2.0, 2.5, 3.0, 3.5, 4.0, 4.5};
float expected_values[] = {1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0};
float log_expected_values[] = {0, 0, 0, 0, 0, 0, 0, 0, 0};
// Mask of 0 will use all of dimension 1
uint32_t softmax_mask = 0;
zdnn_softmax_mask_test(shape, ZDNN_3DS, input_values, SOFTMAX_ACT_NONE,
softmax_mask, ZDNN_OK, expected_values);
zdnn_softmax_mask_test(shape, ZDNN_3DS, input_values, SOFTMAX_ACT_LOG,
softmax_mask, ZDNN_OK, log_expected_values);
}
void zdnn_softmax_mask_basic_3ds_1() {
// Initialize the dimensions for our input tensor ZDNN_3DS
uint32_t shape[] = {3, 3, 1}; // Will be same for in and out dim.
float input_values[] = {0.5, 1.0, 1.5, 2.0, 2.5, 3.0, 3.5, 4.0, 4.5};
float expected_values[] = {1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0};
float log_expected_values[] = {0, 0, 0, 0, 0, 0, 0, 0, 0};
// Mask greater than 0 will use elements [0, mask] (inclusive) of dimension 1
uint32_t softmax_mask = 1;
zdnn_softmax_mask_test(shape, ZDNN_3DS, input_values, SOFTMAX_ACT_NONE,
softmax_mask, ZDNN_OK, expected_values);
zdnn_softmax_mask_test(shape, ZDNN_3DS, input_values, SOFTMAX_ACT_LOG,
softmax_mask, ZDNN_OK, log_expected_values);
}
/*
------------------------------------------------------------------------------
SoftmaxMask Basic
Layout: 3DS
------------------------------------------------------------------------------
*/
/**
* zdnn_softmax_mask_balanced_3ds_
*
* Balanced (pos and neg inputs) test to demonstrate softmax
*
* Input values as 3DS
* [[
* [[-2, -1.5], [-1, -0.5]],
* [[0.5, 1.0], [1.5, 2.0]],
* ]]
*
* Expected Output values as 3DS with no activation
* [[
* [[0.37754068, 0.62245935], [0.37754068, 0.62245935]],
* [[0.37754068, 0.62245935], [0.37754068, 0.62245935]],
* ]]
*
* Expected Output values as 3DS with log activation
* [[
* [[-0.974077 -0.47407693], [-0.974077 -0.47407693]]
* [[-0.974077 -0.47407693], [-0.974077 -0.47407693]]
* ]]
*/
void zdnn_softmax_mask_balanced_3ds() {
// Initialize the dimensions for our input tensor ZDNN_3DS
uint32_t shape[] = {2, 2, 2}; // Will be same for in and out dim.
float input_values[] = {-2, -1.5, -1, -0.5, 0.5, 1, 1.5, 2};
float expected_values[] = {0.37754068, 0.62245935, 0.37754068, 0.62245935,
0.37754068, 0.62245935, 0.37754068, 0.62245935};
float log_expected_values[] = {-0.974077, -0.47407693, -0.974077,
-0.47407693, -0.974077, -0.47407693,
-0.974077, -0.47407693};
// Mask of 0 will use all of dimension 1
uint32_t softmax_mask = 0;
zdnn_softmax_mask_test(shape, ZDNN_3DS, input_values, SOFTMAX_ACT_NONE,
softmax_mask, ZDNN_OK, expected_values);
zdnn_softmax_mask_test(shape, ZDNN_3DS, input_values, SOFTMAX_ACT_LOG,
softmax_mask, ZDNN_OK, log_expected_values);
}
void zdnn_softmax_mask_balanced_3ds_1() {
// Initialize the dimensions for our input tensor ZDNN_3DS
uint32_t shape[] = {2, 2, 2}; // Will be same for in and out dim.
float input_values[] = {-2, -1.5, -1, -0.5, 0.5, 1, 1.5, 2};
float expected_values[] = {1.0, 0, 1.0, 0, 1.0, 0, 1.0, 0};
float log_expected_values[] = {0, 0, 0, 0, 0, 0, 0, 0, 0};
// Mask greater than 0 will use elements [0, mask] (inclusive) of dimension 1
uint32_t softmax_mask = 1;
zdnn_softmax_mask_test(shape, ZDNN_3DS, input_values, SOFTMAX_ACT_NONE,
softmax_mask, ZDNN_OK, expected_values);
zdnn_softmax_mask_test(shape, ZDNN_3DS, input_values, SOFTMAX_ACT_LOG,
softmax_mask, ZDNN_OK, log_expected_values);
}
void zdnn_softmax_mask_balanced_3ds_2() {
// Initialize the dimensions for our input tensor ZDNN_3DS
uint32_t shape[] = {2, 2, 2}; // Will be same for in and out dim.
float input_values[] = {-2, -1.5, -1, -0.5, 0.5, 1, 1.5, 2};
float expected_values[] = {0.37754068, 0.62245935, 0.37754068, 0.62245935,
0.37754068, 0.62245935, 0.37754068, 0.62245935};
float log_expected_values[] = {-0.974077, -0.47407693, -0.974077,
-0.47407693, -0.974077, -0.47407693,
-0.974077, -0.47407693};
// Mask greater than 0 will use elements [0, mask] (inclusive) of dimension 1
uint32_t softmax_mask = 2;
zdnn_softmax_mask_test(shape, ZDNN_3DS, input_values, SOFTMAX_ACT_NONE,
softmax_mask, ZDNN_OK, expected_values);
zdnn_softmax_mask_test(shape, ZDNN_3DS, input_values, SOFTMAX_ACT_LOG,
softmax_mask, ZDNN_OK, log_expected_values);
}
/*
------------------------------------------------------------------------------
SoftmaxMask Basic
Layout: ZDNN_3D
------------------------------------------------------------------------------
*/
/**
* zdnn_softmax_mask_negative_3ds_
*
* Negative test to demonstrate tanh
*
* Input values as NWC sized (1,1,8):
* [[
* [[-1.4, -2.8, -3.12, -4.16, -5.20, -6.24, -7.28, -8.32]],
* ]]
*
* Expected Output values as NWC sized (1,1,8) with no activation:
* [[
* [[0.656592, 0.161914, 0.117573,
* 0.041557, 0.014688, 0.005192,
* 0.001835 , 0.000649]],
* ]]
*
* Expected Output values as NWC sized (1,1,8) with log activation:
* [[
* [[-0.42069218, -1.8206921, -2.140692,
* -3.180692, -4.2206917, -5.260692,
* -6.300692, -7.3406916]],
* ]]
*
*/
void zdnn_softmax_mask_negative_3ds() {
// Initialize the dimensions for our input tensor--ZDNN_3DS [C,W,N]
uint32_t shape[] = {1, 1, 8}; // Will be same for in and out dim.
float input_values[] = {-1.4, -2.8, -3.12, -4.16, -5.20, -6.24, -7.28, -8.32};
float expected_values[] = {0.656592, 0.161914, 0.117573, 0.041557,
0.014688, 0.005192, 0.001835, 0.000649};
float log_expected_values[] = {-0.42069218, -1.8206921, -2.140692,
-3.180692, -4.2206917, -5.260692,
-6.300692, -7.3406916};
// Mask of 0 will use all of dimension 1
uint32_t softmax_mask = 0;
zdnn_softmax_mask_test(shape, ZDNN_3DS, input_values, SOFTMAX_ACT_NONE,
softmax_mask, ZDNN_OK, expected_values);
zdnn_softmax_mask_test(shape, ZDNN_3DS, input_values, SOFTMAX_ACT_LOG,
softmax_mask, ZDNN_OK, log_expected_values);
}
void zdnn_softmax_mask_negative_3ds_1() {
// Initialize the dimensions for our input tensor--ZDNN_3DS [C,W,N]
uint32_t shape[] = {1, 1, 8}; // Will be same for in and out dim.
float input_values[] = {-1.4, -2.8, -3.12, -4.16, -5.20, -6.24, -7.28, -8.32};
float expected_values[] = {1.0, 0, 0, 0, 0, 0, 0, 0};
float log_expected_values[] = {0, 0, 0, 0, 0, 0, 0, 0};
// Mask greater than 0 will use elements [0, mask] (inclusive) of dimension 1
uint32_t softmax_mask = 1;
zdnn_softmax_mask_test(shape, ZDNN_3DS, input_values, SOFTMAX_ACT_NONE,
softmax_mask, ZDNN_OK, expected_values);
zdnn_softmax_mask_test(shape, ZDNN_3DS, input_values, SOFTMAX_ACT_LOG,
softmax_mask, ZDNN_OK, log_expected_values);
}
void zdnn_softmax_mask_negative_3ds_2() {
// Initialize the dimensions for our input tensor--ZDNN_3DS [C,W,N]
uint32_t shape[] = {1, 1, 8}; // Will be same for in and out dim.
float input_values[] = {-1.4, -2.8, -3.12, -4.16, -5.20, -6.24, -7.28, -8.32};
float expected_values[] = {0.802184, 0.197816, 0, 0, 0, 0, 0, 0};
float log_expected_values[] = {-0.220417, -1.620417, 0, 0, 0, 0, 0, 0};
// Mask greater than 0 will use elements [0, mask] (inclusive) of dimension 1
uint32_t softmax_mask = 2;
zdnn_softmax_mask_test(shape, ZDNN_3DS, input_values, SOFTMAX_ACT_NONE,
softmax_mask, ZDNN_OK, expected_values);
zdnn_softmax_mask_test(shape, ZDNN_3DS, input_values, SOFTMAX_ACT_LOG,
softmax_mask, ZDNN_OK, log_expected_values);
}
void zdnn_softmax_mask_negative_3ds_3() {
// Initialize the dimensions for our input tensor--ZDNN_3DS [C,W,N]
uint32_t shape[] = {1, 1, 8}; // Will be same for in and out dim.
float input_values[] = {-1.4, -2.8, -3.12, -4.16, -5.20, -6.24, -7.28, -8.32};
float expected_values[] = {0.701172, 0.172852, 0.125488, 0, 0, 0, 0, 0};
float log_expected_values[] = {-0.354492, -1.753906, -2.074219, 0,
0, 0, 0, 0};
// Mask greater than 0 will use elements [0, mask] (inclusive) of dimension 1
uint32_t softmax_mask = 3;
zdnn_softmax_mask_test(shape, ZDNN_3DS, input_values, SOFTMAX_ACT_NONE,
softmax_mask, ZDNN_OK, expected_values);
zdnn_softmax_mask_test(shape, ZDNN_3DS, input_values, SOFTMAX_ACT_LOG,
softmax_mask, ZDNN_OK, log_expected_values);
}
/*
------------------------------------------------------------------------------
SoftmaxMask Large
Layout: 3DS
------------------------------------------------------------------------------
*/
/**
* zdnn_softmax_mask_basic_3ds_large
*
* Simple test of all positive input values
* Expect a mirror of the Input values as the Output values
*
* Input values as 3DS
* [[
* [[0.65536, 0.65100, 0.65200],
* [0.64000, 0.64100, 0.64200],
* [0.63000, 0.63100, 0.63200]],
* [[0.62000, 0.62100, 0.62200],
* [0.61000, 0.61100, 0.61200],
* [0.60000, 0.60100, 0.60200]],
* [[0.59000, 0.59100, 0.59200],
* [0.58000, 0.58100, 0.58200],
* [0.57000, 0.57100, 0.57200]]
* ]]
*
* Expected Output values as 3DS with no activation
* [[
* [[0.33419162, 0.3327377, 0.33307064]
* [0.33300006, 0.33333322, 0.33366674]
* [0.33300006, 0.33333322, 0.33366674]]
* [[0.33300006, 0.3333332, 0.3336667]
* [0.33300006, 0.3333332, 0.3336667]
* [0.33300006, 0.3333332, 0.3336667]]
* [[0.33300003, 0.3333332, 0.3336667]
* [0.33300006, 0.33333322, 0.33366674]
* [0.33300006, 0.33333322, 0.33366674]]
* ]]
*
* Expected Output values as 3DS with log activation
* [[
* [[-1.0960407 -1.1004007 -1.0994008]
* [-1.0996126 -1.0986125 -1.0976126]
* [-1.0996126 -1.0986125 -1.0976126]]
* [[-1.0996126 -1.0986127 -1.0976126]
* [-1.0996126 -1.0986127 -1.0976126]
* [-1.0996126 -1.0986127 -1.0976126]]
* [[-1.0996127 -1.0986127 -1.0976126]
* [-1.0996126 -1.0986125 -1.0976126]
* [-1.0996126 -1.0986127 -1.0976126]]
* ]]
*/
void zdnn_softmax_mask_basic_3ds_large() {
// Initialize the dimensions for our input tensor ZDNN_3DS
uint32_t shape[] = {3, 3, 3};
float input_values[] = {0.65536, 0.65100, 0.65200, 0.64000, 0.64100, 0.64200,
0.63000, 0.63100, 0.63200, 0.62000, 0.62100, 0.62200,
0.61000, 0.61100, 0.61200, 0.60000, 0.60100, 0.60200,
0.59000, 0.59100, 0.59200, 0.58000, 0.58100, 0.58200,
0.57000, 0.57100, 0.57200};
float expected_values[] = {
0.33419162, 0.3327377, 0.33307064, 0.33300006, 0.33333322, 0.33366674,
0.33300006, 0.33333322, 0.33366674, 0.33300006, 0.3333332, 0.3336667,
0.33300006, 0.3333332, 0.3336667, 0.33300006, 0.3333332, 0.3336667,
0.33300003, 0.3333332, 0.3336667, 0.33300006, 0.33333322, 0.33366674,
0.33300006, 0.33333322, 0.33366674};
float log_expected_values[] = {
-1.0960407, -1.1004007, -1.0994008, -1.0996126, -1.0986125, -1.0976126,
-1.0996126, -1.0986125, -1.0976126, -1.0996126, -1.0986127, -1.0976126,
-1.0996126, -1.0986127, -1.0976126, -1.0996126, -1.0986127, -1.0976126,
-1.0996127, -1.0986127, -1.0976126, -1.0996126, -1.0986125, -1.0976126,
-1.0996126, -1.0986127, -1.0976126};
// Mask of 0 will use all of dimension 1
uint32_t softmax_mask = 0;
zdnn_softmax_mask_test(shape, ZDNN_3DS, input_values, SOFTMAX_ACT_NONE,
softmax_mask, ZDNN_OK, expected_values);
zdnn_softmax_mask_test(shape, ZDNN_3DS, input_values, SOFTMAX_ACT_LOG,
softmax_mask, ZDNN_OK, log_expected_values);
}
void zdnn_softmax_mask_basic_3ds_large_1() {
// Initialize the dimensions for our input tensor ZDNN_3DS
uint32_t shape[] = {3, 3, 3};
float input_values[] = {0.65536, 0.65100, 0.65200, 0.64000, 0.64100, 0.64200,
0.63000, 0.63100, 0.63200, 0.62000, 0.62100, 0.62200,
0.61000, 0.61100, 0.61200, 0.60000, 0.60100, 0.60200,
0.59000, 0.59100, 0.59200, 0.58000, 0.58100, 0.58200,
0.57000, 0.57100, 0.57200};
float expected_values[] = {1.0, 0, 0, 1.0, 0, 0, 1.0, 0, 0,
1.0, 0, 0, 1.0, 0, 0, 1.0, 0, 0,
1.0, 0, 0, 1.0, 0, 0, 1.0, 0, 0};
float log_expected_values[] = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0};
// Mask greater than 0 will use elements [0, mask] (inclusive) of dimension 1
uint32_t softmax_mask = 1;
zdnn_softmax_mask_test(shape, ZDNN_3DS, input_values, SOFTMAX_ACT_NONE,
softmax_mask, ZDNN_OK, expected_values);
zdnn_softmax_mask_test(shape, ZDNN_3DS, input_values, SOFTMAX_ACT_LOG,
softmax_mask, ZDNN_OK, log_expected_values);
}
void zdnn_softmax_mask_basic_3ds_large_2() {
// Initialize the dimensions for our input tensor ZDNN_3DS
uint32_t shape[] = {3, 3, 3};
float input_values[] = {0.65536, 0.65100, 0.65200, 0.64000, 0.64100, 0.64200,
0.63000, 0.63100, 0.63200, 0.62000, 0.62100, 0.62200,
0.61000, 0.61100, 0.61200, 0.60000, 0.60100, 0.60200,
0.59000, 0.59100, 0.59200, 0.58000, 0.58100, 0.58200,
0.57000, 0.57100, 0.57200};
float expected_values[] = {
0.500977, 0.499023, 0, 0.500000, 0.500000, 0, 0.500000, 0.500000, 0,
0.500000, 0.500000, 0, 0.500000, 0.500000, 0, 0.500000, 0.500000, 0,
0.500000, 0.500000, 0, 0.500000, 0.500000, 0, 0.500000, 0.500000, 0};
float log_expected_values[] = {
-0.691406, -0.695312, 0, -0.693359, -0.692383, 0,
-0.693359, -0.692383, 0, -0.693359, -0.692383, 0,
-0.693359, -0.692383, 0, -0.693359, -0.692383, 0,
-0.693359, -0.692383, 0, -0.693359, -0.692383, 0,
-0.693359, -0.692383, 0};
// Mask greater than 0 will use elements [0, mask] (inclusive) of dimension 1
uint32_t softmax_mask = 2;
zdnn_softmax_mask_test(shape, ZDNN_3DS, input_values, SOFTMAX_ACT_NONE,
softmax_mask, ZDNN_OK, expected_values);
zdnn_softmax_mask_test(shape, ZDNN_3DS, input_values, SOFTMAX_ACT_LOG,
softmax_mask, ZDNN_OK, log_expected_values);
}
void zdnn_softmax_mask_basic_3ds_large_3() {
// Initialize the dimensions for our input tensor ZDNN_3DS
uint32_t shape[] = {3, 3, 3};
float input_values[] = {0.65536, 0.65100, 0.65200, 0.64000, 0.64100, 0.64200,
0.63000, 0.63100, 0.63200, 0.62000, 0.62100, 0.62200,
0.61000, 0.61100, 0.61200, 0.60000, 0.60100, 0.60200,
0.59000, 0.59100, 0.59200, 0.58000, 0.58100, 0.58200,
0.57000, 0.57100, 0.57200};
float expected_values[] = {
0.33419162, 0.3327377, 0.33307064, 0.33300006, 0.33333322, 0.33366674,
0.33300006, 0.33333322, 0.33366674, 0.33300006, 0.3333332, 0.3336667,
0.33300006, 0.3333332, 0.3336667, 0.33300006, 0.3333332, 0.3336667,
0.33300003, 0.3333332, 0.3336667, 0.33300006, 0.33333322, 0.33366674,
0.33300006, 0.33333322, 0.33366674};
float log_expected_values[] = {
-1.0960407, -1.1004007, -1.0994008, -1.0996126, -1.0986125, -1.0976126,
-1.0996126, -1.0986125, -1.0976126, -1.0996126, -1.0986127, -1.0976126,
-1.0996126, -1.0986127, -1.0976126, -1.0996126, -1.0986127, -1.0976126,
-1.0996127, -1.0986127, -1.0976126, -1.0996126, -1.0986125, -1.0976126,
-1.0996126, -1.0986127, -1.0976126};
// Mask greater than 0 will use elements [0, mask] (inclusive) of dimension 1
uint32_t softmax_mask = 3;
zdnn_softmax_mask_test(shape, ZDNN_3DS, input_values, SOFTMAX_ACT_NONE,
softmax_mask, ZDNN_OK, expected_values);
zdnn_softmax_mask_test(shape, ZDNN_3DS, input_values, SOFTMAX_ACT_LOG,
softmax_mask, ZDNN_OK, log_expected_values);
}
/*
------------------------------------------------------------------------------
SoftmaxMask Large
Layout: 3DS
------------------------------------------------------------------------------
*/
/**
* zdnn_softmax_mask_balanced_3ds_large
*
* Input values as 3DS
* [[[ 0.9356609 , 1.0854305 , -0.93788373],
* [-0.5061547 , 1.3169702 , 0.7137579 ]],
* [[-0.4126717 , -0.40257987, 2.0713255 ],
* [-0.35911667, 0.3861619 , 1.9897066 ]],
* [[-0.2823396 , -0.5135972 , -0.8962833 ],
* [-0.0901652 , -0.73964226, -0.46269894]],
* [[ 0.42379895, 1.1180195 , 1.4442351 ],
* [-1.0771092 , 0.9014347 , -0.14529487]],
* [[ 1.173365 , 1.510687 , -0.46714714],
* [ 1.3281798 , 1.7365712 , -1.5435543 ]],
* [[ 0.35064182, 0.5708492 , -1.8452454 ],
* [ 0.9243176 , 0.57233644, -1.0959795 ]],
* [[-0.62557054, 0.686686 , 0.4222773 ],
* [-0.2146352 , -0.81243026, -1.1678637 ]],
* [[ 1.6384528 , 1.187959 , -2.5538385 ],
* [-0.39338952, 0.233341 , -1.6181145 ]],
* [[-0.8736809 , 0.05150718, 2.2328985 ],
* [ 2.8749912 , 0.08306922, -0.9871888 ]],
* [[ 0.47143334, -1.7806206 , -0.27681163],
* [-0.9240901 , 1.3088665 , 0.7826533 ]]]
*
* Expected Output values as 3DS with no activation
* [[
* [[0.43193838, 0.5017252, 0.06633637],
* [0.09453523, 0.5852842, 0.32018057]],
* [[0.07143247, 0.07215702, 0.85641056],
* [0.07363626, 0.15515368, 0.7712101 ]],
* [[0.42831188, 0.3398805, 0.23180765],
* [0.45222163, 0.23620388, 0.31157458]],
* [[0.17311363, 0.3465991, 0.48028725],
* [0.09283915, 0.67143184, 0.23572904]],
* [[0.38534594, 0.5399429, 0.07471115],
* [0.390473, 0.58742595, 0.02210104]],
* [[0.42416108, 0.5286468, 0.04719208],
* [0.5446892, 0.38307628, 0.07223454]],
* [[0.13216929, 0.49094895, 0.37688172],
* [0.51665765, 0.28417364, 0.19916865]],
* [[0.6051712, 0.3856837, 0.00914512],
* [0.31592378, 0.5912456, 0.09283058]],
* [[0.03865956, 0.09751265, 0.86382776],
* [0.9239366, 0.05664035, 0.01942311]],
* [[0.6335613, 0.06663986, 0.29979888],
* [0.06313774, 0.5889111, 0.34795114]]
* ]]
*
* Expected Output values as 3DS with log activation
* [[
* [[-0.83947235 -0.68970275 -2.713017 ]
* [-2.3587828 -0.53565776 -1.1388701 ]]
* [[-2.6390028 -2.6289108 -0.1550054 ]
* [-2.6086178 -1.8633392 -0.25979444]]
* [[-0.84790367 -1.0791612 -1.4618473 ]
* [-0.79358286 -1.4430599 -1.1661166 ]]
* [[-1.7538071 -1.0595865 -0.7333709 ]
* [-2.3768868 -0.39834276 -1.4450723 ]]
* [[-0.9536138 -0.6162919 -2.594126 ]
* [-0.9403964 -0.5320051 -3.8121307 ]]
* [[-0.857642 -0.6374347 -3.0535293 ]
* [-0.60753995 -0.9595212 -2.627837 ]]
* [[-2.0236716 -0.7114151 -0.9758239 ]
* [-0.6603748 -1.2581699 -1.6136034 ]]
* [[-0.5022439 -0.9527377 -4.6945353 ]
* [-1.1522543 -0.5255238 -2.376979 ]]
* [[-3.2529612 -2.327773 -0.14638188]
* [-0.07911182 -2.8710337 -3.9412918 ]]
* [[-0.4563985 -2.7084525 -1.2046435 ]
* [-2.7624366 -0.52948 -1.0556931 ]]]
* ]]
*/
void zdnn_softmax_mask_balanced_3ds_large() {
// Initialize the dimensions for our input tensor ZDNN_3DS
uint32_t shape[] = {10, 2, 3}; // Will be same for in and out dim.
float input_values[] = {
0.9356609, 1.0854305, -0.93788373, -0.5061547, 1.3169702,
0.7137579, -0.4126717, -0.40257987, 2.0713255, -0.35911667,
0.3861619, 1.9897066, -0.2823396, -0.5135972, -0.8962833,
-0.0901652, -0.73964226, -0.46269894, 0.42379895, 1.1180195,
1.4442351, -1.0771092, 0.9014347, -0.14529487, 1.173365,
1.510687, -0.46714714, 1.3281798, 1.7365712, -1.5435543,
0.35064182, 0.5708492, -1.8452454, 0.9243176, 0.57233644,
-1.0959795, -0.62557054, 0.686686, 0.4222773, -0.2146352,
-0.81243026, -1.1678637, 1.6384528, 1.187959, -2.5538385,
-0.39338952, 0.233341, -1.6181145, -0.8736809, 0.05150718,
2.2328985, 2.8749912, 0.08306922, -0.9871888, 0.47143334,
-1.7806206, -0.27681163, -0.9240901, 1.3088665, 0.7826533};
float expected_values[] = {
0.43193838, 0.5017252, 0.06633637, 0.09453523, 0.5852842, 0.32018057,
0.07143247, 0.07215702, 0.85641056, 0.07363626, 0.15515368, 0.7712101,
0.42831188, 0.3398805, 0.23180765, 0.45222163, 0.23620388, 0.31157458,
0.17311363, 0.3465991, 0.48028725, 0.09283915, 0.67143184, 0.23572904,
0.38534594, 0.5399429, 0.07471115, 0.390473, 0.58742595, 0.02210104,
0.42416108, 0.5286468, 0.04719208, 0.5446892, 0.38307628, 0.07223454,
0.13216929, 0.49094895, 0.37688172, 0.51665765, 0.28417364, 0.19916865,
0.6051712, 0.3856837, 0.00914512, 0.31592378, 0.5912456, 0.09283058,
0.03865956, 0.09751265, 0.86382776, 0.9239366, 0.05664035, 0.01942311,
0.6335613, 0.06663986, 0.29979888, 0.06313774, 0.5889111, 0.34795114};
float log_expected_values[] = {
-0.83947235, -0.68970275, -2.713017, -2.3587828, -0.53565776,
-1.1388701, -2.6390028, -2.6289108, -0.1550054, -2.6086178,
-1.8633392, -0.25979444, -0.84790367, -1.0791612, -1.4618473,
-0.79358286, -1.4430599, -1.1661166, -1.7538071, -1.0595865,
-0.7333709, -2.3768868, -0.39834276, -1.4450723, -0.9536138,
-0.6162919, -2.594126, -0.9403964, -0.5320051, -3.8121307,
-0.857642, -0.6374347, -3.0535293, -0.60753995, -0.9595212,
-2.627837, -2.0236716, -0.7114151, -0.9758239, -0.6603748,
-1.2581699, -1.6136034, -0.5022439, -0.9527377, -4.6945353,
-1.1522543, -0.5255238, -2.376979, -3.2529612, -2.327773,
-0.14638188, -0.07911182, -2.8710337, -3.9412918, -0.4563985,
-2.7084525, -1.2046435, -2.7624366, -0.52948, -1.0556931};
// Mask of 0 will use all of dimension 1
uint32_t softmax_mask = 0;
zdnn_softmax_mask_test(shape, ZDNN_3DS, input_values, SOFTMAX_ACT_NONE,
softmax_mask, ZDNN_OK, expected_values);
zdnn_softmax_mask_test(shape, ZDNN_3DS, input_values, SOFTMAX_ACT_LOG,
softmax_mask, ZDNN_OK, log_expected_values);
}
void zdnn_softmax_mask_balanced_3ds_large_1() {
// Initialize the dimensions for our input tensor ZDNN_3DS
uint32_t shape[] = {10, 2, 3}; // Will be same for in and out dim.
float input_values[] = {
0.9356609, 1.0854305, -0.93788373, -0.5061547, 1.3169702,
0.7137579, -0.4126717, -0.40257987, 2.0713255, -0.35911667,
0.3861619, 1.9897066, -0.2823396, -0.5135972, -0.8962833,
-0.0901652, -0.73964226, -0.46269894, 0.42379895, 1.1180195,
1.4442351, -1.0771092, 0.9014347, -0.14529487, 1.173365,
1.510687, -0.46714714, 1.3281798, 1.7365712, -1.5435543,
0.35064182, 0.5708492, -1.8452454, 0.9243176, 0.57233644,
-1.0959795, -0.62557054, 0.686686, 0.4222773, -0.2146352,
-0.81243026, -1.1678637, 1.6384528, 1.187959, -2.5538385,
-0.39338952, 0.233341, -1.6181145, -0.8736809, 0.05150718,
2.2328985, 2.8749912, 0.08306922, -0.9871888, 0.47143334,
-1.7806206, -0.27681163, -0.9240901, 1.3088665, 0.7826533};
float expected_values[] = {1.0, 0, 0, 1.0, 0, 0, 1.0, 0, 0, 1.0, 0, 0,
1.0, 0, 0, 1.0, 0, 0, 1.0, 0, 0, 1.0, 0, 0,
1.0, 0, 0, 1.0, 0, 0, 1.0, 0, 0, 1.0, 0, 0,
1.0, 0, 0, 1.0, 0, 0, 1.0, 0, 0, 1.0, 0, 0,
1.0, 0, 0, 1.0, 0, 0, 1.0, 0, 0, 1.0, 0, 0};
float log_expected_values[] = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0};
// Mask greater than 0 will use elements [0, mask] (inclusive) of dimension 1
uint32_t softmax_mask = 1;
zdnn_softmax_mask_test(shape, ZDNN_3DS, input_values, SOFTMAX_ACT_NONE,
softmax_mask, ZDNN_OK, expected_values);
zdnn_softmax_mask_test(shape, ZDNN_3DS, input_values, SOFTMAX_ACT_LOG,
softmax_mask, ZDNN_OK, log_expected_values);
}
void zdnn_softmax_mask_balanced_3ds_large_2() {
// Initialize the dimensions for our input tensor ZDNN_3DS
uint32_t shape[] = {10, 2, 3}; // Will be same for in and out dim.
float input_values[] = {
0.9356609, 1.0854305, -0.93788373, -0.5061547, 1.3169702,
0.7137579, -0.4126717, -0.40257987, 2.0713255, -0.35911667,
0.3861619, 1.9897066, -0.2823396, -0.5135972, -0.8962833,
-0.0901652, -0.73964226, -0.46269894, 0.42379895, 1.1180195,
1.4442351, -1.0771092, 0.9014347, -0.14529487, 1.173365,
1.510687, -0.46714714, 1.3281798, 1.7365712, -1.5435543,
0.35064182, 0.5708492, -1.8452454, 0.9243176, 0.57233644,
-1.0959795, -0.62557054, 0.686686, 0.4222773, -0.2146352,
-0.81243026, -1.1678637, 1.6384528, 1.187959, -2.5538385,
-0.39338952, 0.233341, -1.6181145, -0.8736809, 0.05150718,
2.2328985, 2.8749912, 0.08306922, -0.9871888, 0.47143334,
-1.7806206, -0.27681163, -0.9240901, 1.3088665, 0.7826533};
float expected_values[] = {
0.462402, 0.537109, 0, 0.139160, 0.860352, 0, 0.497559, 0.502930, 0,
0.321777, 0.677734, 0, 0.557617, 0.442383, 0, 0.657227, 0.343262, 0,
0.333496, 0.666992, 0, 0.121582, 0.878906, 0, 0.416992, 0.583008, 0,
0.399414, 0.600586, 0, 0.444824, 0.554688, 0, 0.586914, 0.412598, 0,
0.212158, 0.788086, 0, 0.645508, 0.354980, 0, 0.611328, 0.389160, 0,
0.348145, 0.651367, 0, 0.283691, 0.715820, 0, 0.942383, 0.057739, 0,
0.905273, 0.095093, 0, 0.096924, 0.903320, 0};
float log_expected_values[] = {
-0.771484, -0.621094, 0, -1.972656, -0.149902, 0, -0.698242, -0.688477, 0,
-1.132812, -0.388672, 0, -0.583984, -0.815430, 0, -0.420410, -1.070312, 0,
-1.099609, -0.405273, 0, -2.105469, -0.129639, 0, -0.875000, -0.539062, 0,
-0.917969, -0.509766, 0, -0.809570, -0.588867, 0, -0.532227, -0.884766, 0,
-1.550781, -0.238281, 0, -0.438477, -1.037109, 0, -0.492676, -0.944336, 0,
-1.054688, -0.428223, 0, -1.259766, -0.333984, 0, -0.059509, -2.851562, 0,
-0.099976, -2.351562, 0, -2.335938, -0.101929, 0};
// Mask greater than 0 will use elements [0, mask] (inclusive) of dimension 1
uint32_t softmax_mask = 2;
zdnn_softmax_mask_test(shape, ZDNN_3DS, input_values, SOFTMAX_ACT_NONE,
softmax_mask, ZDNN_OK, expected_values);
zdnn_softmax_mask_test(shape, ZDNN_3DS, input_values, SOFTMAX_ACT_LOG,
softmax_mask, ZDNN_OK, log_expected_values);
}
void zdnn_softmax_mask_balanced_3ds_large_3() {
// Initialize the dimensions for our input tensor ZDNN_3DS
uint32_t shape[] = {10, 2, 3}; // Will be same for in and out dim.
float input_values[] = {
0.9356609, 1.0854305, -0.93788373, -0.5061547, 1.3169702,
0.7137579, -0.4126717, -0.40257987, 2.0713255, -0.35911667,
0.3861619, 1.9897066, -0.2823396, -0.5135972, -0.8962833,
-0.0901652, -0.73964226, -0.46269894, 0.42379895, 1.1180195,
1.4442351, -1.0771092, 0.9014347, -0.14529487, 1.173365,
1.510687, -0.46714714, 1.3281798, 1.7365712, -1.5435543,
0.35064182, 0.5708492, -1.8452454, 0.9243176, 0.57233644,
-1.0959795, -0.62557054, 0.686686, 0.4222773, -0.2146352,
-0.81243026, -1.1678637, 1.6384528, 1.187959, -2.5538385,
-0.39338952, 0.233341, -1.6181145, -0.8736809, 0.05150718,
2.2328985, 2.8749912, 0.08306922, -0.9871888, 0.47143334,
-1.7806206, -0.27681163, -0.9240901, 1.3088665, 0.7826533};
float expected_values[] = {
0.43193838, 0.5017252, 0.06633637, 0.09453523, 0.5852842, 0.32018057,
0.07143247, 0.07215702, 0.85641056, 0.07363626, 0.15515368, 0.7712101,
0.42831188, 0.3398805, 0.23180765, 0.45222163, 0.23620388, 0.31157458,
0.17311363, 0.3465991, 0.48028725, 0.09283915, 0.67143184, 0.23572904,
0.38534594, 0.5399429, 0.07471115, 0.390473, 0.58742595, 0.02210104,
0.42416108, 0.5286468, 0.04719208, 0.5446892, 0.38307628, 0.07223454,
0.13216929, 0.49094895, 0.37688172, 0.51665765, 0.28417364, 0.19916865,
0.6051712, 0.3856837, 0.00914512, 0.31592378, 0.5912456, 0.09283058,
0.03865956, 0.09751265, 0.86382776, 0.9239366, 0.05664035, 0.01942311,
0.6335613, 0.06663986, 0.29979888, 0.06313774, 0.5889111, 0.34795114};
float log_expected_values[] = {
-0.83947235, -0.68970275, -2.713017, -2.3587828, -0.53565776,
-1.1388701, -2.6390028, -2.6289108, -0.1550054, -2.6086178,
-1.8633392, -0.25979444, -0.84790367, -1.0791612, -1.4618473,
-0.79358286, -1.4430599, -1.1661166, -1.7538071, -1.0595865,
-0.7333709, -2.3768868, -0.39834276, -1.4450723, -0.9536138,
-0.6162919, -2.594126, -0.9403964, -0.5320051, -3.8121307,
-0.857642, -0.6374347, -3.0535293, -0.60753995, -0.9595212,
-2.627837, -2.0236716, -0.7114151, -0.9758239, -0.6603748,
-1.2581699, -1.6136034, -0.5022439, -0.9527377, -4.6945353,
-1.1522543, -0.5255238, -2.376979, -3.2529612, -2.327773,
-0.14638188, -0.07911182, -2.8710337, -3.9412918, -0.4563985,
-2.7084525, -1.2046435, -2.7624366, -0.52948, -1.0556931};
// Mask greater than 0 will use elements [0, mask] (inclusive) of dimension 1
uint32_t softmax_mask = 3;
zdnn_softmax_mask_test(shape, ZDNN_3DS, input_values, SOFTMAX_ACT_NONE,
softmax_mask, ZDNN_OK, expected_values);
zdnn_softmax_mask_test(shape, ZDNN_3DS, input_values, SOFTMAX_ACT_LOG,
softmax_mask, ZDNN_OK, log_expected_values);
}
void zdnn_softmax_mask_invalid_mask() {
// Initialize the dimensions for our input tensor ZDNN_3DS
uint32_t shape[] = {3, 3, 1}; // Will be same for in and out dim.
float input_values[] = {0.5, 1.0, 1.5, 2.0, 2.5, 3.0, 3.5, 4.0, 4.5};
float expected_values[] = {1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0};
float log_expected_values[] = {0, 0, 0, 0, 0, 0, 0, 0, 0};
// Mask must be less than or equal to dimension 1
uint32_t softmax_mask = 2;
zdnn_softmax_mask_test(shape, ZDNN_3DS, input_values, SOFTMAX_ACT_NONE,
softmax_mask, ZDNN_FUNC_RC_F002, expected_values);
zdnn_softmax_mask_test(shape, ZDNN_3DS, input_values, SOFTMAX_ACT_LOG,
softmax_mask, ZDNN_FUNC_RC_F002, log_expected_values);
}
int main() {
UNITY_BEGIN();
RUN_TEST_ALL_DLFLOAT16_PRE_DATATYPES(zdnn_softmax_mask_basic_3ds);
RUN_TEST_ALL_DLFLOAT16_PRE_DATATYPES(zdnn_softmax_mask_basic_3ds_1);
RUN_TEST_ALL_DLFLOAT16_PRE_DATATYPES(zdnn_softmax_mask_basic_3ds_large);
RUN_TEST_ALL_DLFLOAT16_PRE_DATATYPES(zdnn_softmax_mask_basic_3ds_large_1);
RUN_TEST_ALL_DLFLOAT16_PRE_DATATYPES(zdnn_softmax_mask_basic_3ds_large_2);
RUN_TEST_ALL_DLFLOAT16_PRE_DATATYPES(zdnn_softmax_mask_basic_3ds_large_3);
RUN_TEST_ALL_DLFLOAT16_PRE_DATATYPES(zdnn_softmax_mask_balanced_3ds);
RUN_TEST_ALL_DLFLOAT16_PRE_DATATYPES(zdnn_softmax_mask_balanced_3ds_1);
RUN_TEST_ALL_DLFLOAT16_PRE_DATATYPES(zdnn_softmax_mask_balanced_3ds_2);
RUN_TEST_ALL_DLFLOAT16_PRE_DATATYPES(zdnn_softmax_mask_negative_3ds);
RUN_TEST_ALL_DLFLOAT16_PRE_DATATYPES(zdnn_softmax_mask_negative_3ds_1);
RUN_TEST_ALL_DLFLOAT16_PRE_DATATYPES(zdnn_softmax_mask_negative_3ds_2)
RUN_TEST_ALL_DLFLOAT16_PRE_DATATYPES(zdnn_softmax_mask_negative_3ds_3);
RUN_TEST_ALL_DLFLOAT16_PRE_DATATYPES(zdnn_softmax_mask_balanced_3ds_large);
RUN_TEST_ALL_DLFLOAT16_PRE_DATATYPES(zdnn_softmax_mask_balanced_3ds_large_1);
RUN_TEST_ALL_DLFLOAT16_PRE_DATATYPES(zdnn_softmax_mask_balanced_3ds_large_2);
RUN_TEST_ALL_DLFLOAT16_PRE_DATATYPES(zdnn_softmax_mask_balanced_3ds_large_3);
RUN_TEST(zdnn_softmax_mask_invalid_mask);
return UNITY_END();
}
zDNN-1.1.2/tests/testDriver_zdnn_sqrt.c 0000664 0000000 0000000 00000014433 15000221702 0020061 0 ustar 00root root 0000000 0000000 // SPDX-License-Identifier: Apache-2.0
/*
* Copyright IBM Corp. 2023, 2024
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#include "common_elwise.h"
#include
void setUp(void) {
VERIFY_HW_ENV;
VERIFY_PARMBLKFORMAT_1;
tol_bfloat.ulps = MAX_ULPS_BFLOAT;
tol_bfloat.epsilon_mult = MAX_EPSILON_MULT_BFLOAT;
// note: api_sqrt_med_dims (FP16)
// api_sqrt_high_dims (FP16)
// need custom tolerance
tol_fp16.ulps = MAX_ULPS_FP16;
tol_fp16.epsilon_mult = (0.02 / EPSILON_FP16) + 1;
tol_fp32.ulps = MAX_ULPS_FLOAT;
tol_fp32.epsilon_mult = MAX_EPSILON_MULT_FLOAT;
}
void tearDown(void) {}
/*
* Simple test to drive a full sqrt api.
*/
void zdnn_sqrt_test(uint32_t *io_dims, zdnn_data_layouts layout, float *input,
zdnn_status expected_status, float *expected_values) {
/*
* Input Tensor
*/
zdnn_ztensor *input_ztensor = alloc_ztensor_with_values(
io_dims, layout, test_datatype, NO_CONCAT, false, input);
/*
* Output Tensor
*/
zdnn_ztensor *output_ztensor = alloc_ztensor_with_values(
io_dims, layout, test_datatype, NO_CONCAT, true, ZERO_ARRAY);
/*
* Begin Testing!
*/
zdnn_status status = zdnn_sqrt(input_ztensor, output_ztensor);
TEST_ASSERT_MESSAGE_FORMATTED(
status == expected_status,
"call to zdnn_sqrt() returned status %08x but expected %08x\n", status,
expected_status);
// To allow for unique tolerance
fp_tolerance *tol = NULL;
switch (output_ztensor->pre_transformed_desc->type) {
case BFLOAT:
tol = &tol_bfloat;
break;
case FP16:
tol = &tol_fp16;
break;
case FP32:
tol = &tol_fp32;
break;
default:
break;
// should never get here
}
if (expected_status == ZDNN_OK) {
assert_ztensor_values_adv(output_ztensor, false, expected_values, *tol);
}
// All done--clean up the tensor buffers
free_ztensor_buffers(2, input_ztensor, output_ztensor);
}
void api_sqrt_basic() {
/* Input values as true NHWC sized (1,2,2,2)
[[
[[3, 30], [6, 60]],
[[8, 80], [3, 10]]
]]
*/
// Values in ZDNN_NHWC order
uint32_t shape[] = {1, 2, 2, 2};
float input_values[] = {3, 30, 6, 60, 8, 80, 3, 10};
/* Expected values as true NHWC sized (1,2,2,2)
[[
[[1.732422, 5.476562], [2.449219, 7.742188]],
[[2.828125, 8.937500], [1.732422, 3.164062]]
]]
*/
float expected_values[] = {1.732422, 5.476562, 2.449219, 7.742188,
2.828125, 8.937500, 1.732422, 3.164062};
zdnn_sqrt_test(shape, ZDNN_NHWC, input_values, ZDNN_OK, expected_values);
}
// test to drive input tensors with 280 values in their buffer.
void api_sqrt_med_dims() {
uint32_t shape[] = {1, 7, 10, 4};
int num_io_buffer_values = shape[0] * shape[1] * shape[2] * shape[3];
// Values in ZDNN_NHWC order
float input_values[num_io_buffer_values];
gen_random_float_array(num_io_buffer_values, input_values);
float expected_values[num_io_buffer_values];
generate_expected_output(sqrtf, input_values, num_io_buffer_values,
expected_values);
zdnn_sqrt_test(shape, ZDNN_NHWC, input_values, ZDNN_OK, expected_values);
}
// test to drive an input tensor with 6825 values in its buffer
void api_sqrt_high_dims() {
uint32_t shape[] = {1, 3, 33, 65};
int num_io_buffer_values = shape[0] * shape[1] * shape[2] * shape[3];
// Values in ZDNN_NHWC order
float input_values[num_io_buffer_values];
gen_random_float_array(num_io_buffer_values, input_values);
float expected_values[num_io_buffer_values];
generate_expected_output(sqrtf, input_values, num_io_buffer_values,
expected_values);
zdnn_sqrt_test(shape, ZDNN_NHWC, input_values, ZDNN_OK, expected_values);
}
/*
* Simple test to drive a full sqrt api using data type and a 3D layout
*/
void api_sqrt_3D() {
/* Input 1 values as true NHWC sized (1,2,2,2)
[[
[[3, 30], [6, 60]],
[[8, 80], [9, 90]]
]]
*/
// Values in ZDNN_NHWC order
uint32_t shape[] = {2, 2, 2};
float input_values[] = {3, 30, 6, 60, 8, 80, 9, 90};
/* Expected values as true NHWC sized (1,2,2,2)
[[
[[1.732422, 5.476562], [2.449219, 7.742188]],
[[2.828125, 8.937500], [3, 9.484375]]
]]
*/
float expected_values[] = {1.732422, 5.476562, 2.449219, 7.742188,
2.828125, 8.937500, 3, 9.484375};
zdnn_sqrt_test(shape, ZDNN_3D, input_values, ZDNN_OK, expected_values);
}
/*
* Simple test to drive a full sqrt api using the data type and a 2D layout
*/
void api_sqrt_2D() {
// Values in ZDNN_NHWC order
uint32_t shape[] = {2, 2};
/* Input 1 values as true NHWC sized (1,1,2,2)
[[
[[1, 10], [2, 6]]
]]
*/
float input_values[] = {1, 10, 2, 6};
/* Expected values as true NHWC sized (1,1,2,2)
[[
[[1, 3.164062], [1.414062, 2.449219]]
]]
*/
float expected_values[] = {1, 3.164062, 1.414062, 2.449219};
zdnn_sqrt_test(shape, ZDNN_2D, input_values, ZDNN_OK, expected_values);
}
/*
* Simple test to drive a full sqrt api using the data type and a 1D layout
*/
void api_sqrt_1D() {
// Values in ZDNN_NHWC order
uint32_t shape[] = {2};
/* Input 1 values as true NHWC sized (1,1,2,2)
[[
[[6, 7]]
]]
*/
float input_values[] = {6, 7};
/* Expected values as true NHWC sized (1,1,2,2)
[[
[[2.449219, 2.644531]]
]]
*/
float expected_values[] = {2.449219, 2.644531};
zdnn_sqrt_test(shape, ZDNN_1D, input_values, ZDNN_OK, expected_values);
}
int main() {
UNITY_BEGIN();
RUN_TEST_ALL_DLFLOAT16_PRE_DATATYPES(api_sqrt_basic);
RUN_TEST_ALL_DLFLOAT16_PRE_DATATYPES(api_sqrt_med_dims);
RUN_TEST_ALL_DLFLOAT16_PRE_DATATYPES(api_sqrt_high_dims);
RUN_TEST_ALL_DLFLOAT16_PRE_DATATYPES(api_sqrt_3D);
RUN_TEST_ALL_DLFLOAT16_PRE_DATATYPES(api_sqrt_2D);
RUN_TEST_ALL_DLFLOAT16_PRE_DATATYPES(api_sqrt_1D);
return UNITY_END();
}
zDNN-1.1.2/tests/testDriver_zdnn_sub_elwise.c 0000664 0000000 0000000 00000015032 15000221702 0021225 0 ustar 00root root 0000000 0000000 // SPDX-License-Identifier: Apache-2.0
/*
* Copyright IBM Corp. 2021, 2024
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#include "common_elwise.h"
void setUp(void) { VERIFY_HW_ENV; }
void tearDown(void) {}
/*
* Simple test to drive a full sub api. Input tensor 1 has values greater than
* those in input tensor 2, so the result values will not be negative.
*/
void api_sub_basic() {
/* Input 1 values as true NHWC
[[
[[3, 30], [6, 60]],
[[8, 80], [9, 90]]
]]
*/
// Values in ZDNN_NHWC order
uint32_t shape[] = {1, 2, 2, 2};
float input1_values[] = {3, 8, 6, 9, 30, 80, 60, 90};
/* Input 2 values as true NHWC
[[
[[1, 10], [2, 20]],
[[4, 40], [5, 50]]
]]
*/
// Values in ZDNN_NHWC order
float input2_values[] = {1, 4, 2, 5, 10, 40, 20, 50};
/* Expected values as true NHWC
[[
[[2, 20], [4, 40]],
[[4, 40], [4, 40]]
]]
*/
test_elwise_api_2_inputs(shape, ZDNN_NHWC, input1_values, input2_values,
NNPA_SUB, ZDNN_OK);
}
// test to drive input tensors with 280 values in their buffer. All randomly
// generated numbers in first input tensor will be greater than or equal to
// those in the second input tensor to avoid negatives in the output tensor
void api_sub_med_dims() {
uint32_t shape[] = {1, 7, 10, 4};
int num_io_buffer_values = shape[0] * shape[1] * shape[2] * shape[3];
// Values in ZDNN_NHWC order
float input1_values[num_io_buffer_values];
gen_random_float_array(num_io_buffer_values, input1_values);
// Values in ZDNN_NHWC order
float input2_values[num_io_buffer_values];
gen_random_float_array(num_io_buffer_values, input2_values);
test_elwise_api_2_inputs(shape, ZDNN_NHWC, input1_values, input2_values,
NNPA_SUB, ZDNN_OK);
}
// test to drive input tensors with 6825 values in their buffer
void api_sub_high_dims() {
uint32_t shape[] = {1, 3, 33, 65};
int num_io_buffer_values = shape[0] * shape[1] * shape[2] * shape[3];
// Values in ZDNN_NHWC order
float input1_values[num_io_buffer_values];
gen_random_float_array(num_io_buffer_values, input1_values);
// Values in ZDNN_NHWC order
float input2_values[num_io_buffer_values];
gen_random_float_array(num_io_buffer_values, input2_values);
test_elwise_api_2_inputs(shape, ZDNN_NHWC, input1_values, input2_values,
NNPA_SUB, ZDNN_OK);
}
/*
* Simple test to drive a full sub api.
*/
void api_sub_3D() {
/* Input 1 values as true NHWC
[[
[[3, 30], [6, 60]],
[[8, 80], [9, 90]]
]]
*/
// Values in ZDNN_NHWC order
uint32_t shape[] = {2, 2, 2};
float input1_values[] = {3, 30, 6, 60, 8, 80, 9, 90};
/* Input 2 values as true NHWC
[[
[[1, 10], [2, 20]],
[[4, 40], [5, 50]]
]]
*/
// Values in ZDNN_NHWC order
float input2_values[] = {1, 10, 2, 20, 4, 40, 5, 50};
/* Expected values as true NHWC
[[
[[2, 20], [4, 40]],
[[4, 40], [4, 40]]
]]
*/
test_elwise_api_2_inputs(shape, ZDNN_3D, input1_values, input2_values,
NNPA_SUB, ZDNN_OK);
}
/*
* Simple test to drive a full sub api using the data type
* and 2 dimensional tensors
*/
void api_sub_2D() {
// Values in ZDNN_NHWC order
uint32_t shape[] = {2, 2};
/* Input 1 values as true NHWC
[[
[[3, 20], [2, 20]]
]]
*/
float input1_values[] = {3, 20, 2, 20};
/* Input 2 values as true NHWC
[[
[[1, 10], [2, 5]]
]]
*/
float input2_values[] = {1, 10, 2, 5};
/* Expected values as true NHWC
[[
[[2, 10], [0, 15]]
]]
*/
test_elwise_api_2_inputs(shape, ZDNN_2D, input1_values, input2_values,
NNPA_SUB, ZDNN_OK);
}
/*
* Simple test to drive a full sub api using the data type
* and 1 dimensional tensors
*/
void api_sub_1D() {
// Values in ZDNN_NHWC order
uint32_t shape[] = {2};
/* Input 1 values as true NHWC
[[
[[8, 4000]]
]]
*/
float input1_values[] = {8, 4000};
/* Input 2 values as true NHWC
[[
[[2.5, 12]]
]]
*/
float input2_values[] = {2.5, 12};
/* Expected values as true NHWC
[[
[[5.5, 3988]]
]]
*/
test_elwise_api_2_inputs(shape, ZDNN_1D, input1_values, input2_values,
NNPA_SUB, ZDNN_OK);
}
/*
* Simple test to drive a full sub api, resulting in underflow.
* Input tensors 1 and 2 have negative values, such that when tensor 2
* is subtracted from tensor 1, the result values will be negative, and
* one value will be exceed the DLFloat16 capability.
*/
void api_sub_underflow() {
/* Input 1 values as true NHWC
[[
[[3, 30], [-MAX_DLF16 * 0.75, 60]],
[[8, 80], [9, 90]]
]]
*/
// Values in ZDNN_NHWC order
uint32_t shape[] = {1, 2, 2, 2};
float input1_values[] = {3, 8, -MAX_DLF16 * 0.75, 9, 30, 80, 60, 90};
/* Input 2 values as true NHWC
[[
[[1, 10], [-MAX_DLF16 * 0.75, 20]],
[[4, 40], [5, 50]]
]]
*/
// Values in ZDNN_NHWC order
float input2_values[] = {1, 4, MAX_DLF16 * 0.75, 5, 10, 40, 20, 50};
/* Expected values as true NHWC
[[
[[2, 20], [UNDERFLOW, 40]],
[[4, 40], [4, 40]]
]]
*/
// when overflow/underflow happens, zAIU sets range violation flag
test_elwise_api_2_inputs_adv(shape, ZDNN_NHWC, FP32, input1_values,
input2_values, NNPA_SUB,
ZDNN_ELEMENT_RANGE_VIOLATION);
test_elwise_api_2_inputs_adv(shape, ZDNN_NHWC, BFLOAT, input1_values,
input2_values, NNPA_SUB,
ZDNN_ELEMENT_RANGE_VIOLATION);
// Note: We can't create an add/sub overflow/underflow with values that
// originate as FP16s, since FP16's max is way below the DLFloat max.
}
int main() {
UNITY_BEGIN();
RUN_TEST_ALL_DLFLOAT16_PRE_DATATYPES(api_sub_basic);
RUN_TEST_ALL_DLFLOAT16_PRE_DATATYPES(api_sub_med_dims);
RUN_TEST_ALL_DLFLOAT16_PRE_DATATYPES(api_sub_high_dims);
RUN_TEST_ALL_DLFLOAT16_PRE_DATATYPES(api_sub_3D);
RUN_TEST_ALL_DLFLOAT16_PRE_DATATYPES(api_sub_2D);
RUN_TEST_ALL_DLFLOAT16_PRE_DATATYPES(api_sub_1D);
RUN_TEST(api_sub_underflow);
return UNITY_END();
}
zDNN-1.1.2/tests/testDriver_zdnn_tanh.c 0000664 0000000 0000000 00000077444 15000221702 0020035 0 ustar 00root root 0000000 0000000 // SPDX-License-Identifier: Apache-2.0
/*
* Copyright IBM Corp. 2021, 2024
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#include "common_act.h"
// -----------------------------------------------------------------------------
// TanH Unit Testing, for convenience, recall the following:
// tanh(x) -> [-1,1]
// For some value x, we squash that value to some real-valued number within
// range [-1,1]. Negative inputs are mapped strongly negative and zero
// inputs are mapped near zero.
// For the behind the scenes:
// tanh(x) -> ( 1 - e(-2(x)) ) / ( 1 + e(-2(x)) )
// https://functions.wolfram.com/ElementaryFunctions/Tanh/
// introductions/Tanh/ShowAll.html
// -----------------------------------------------------------------------------
void setUp(void) { VERIFY_HW_ENV; }
void tearDown(void) {}
/**
* Helper function to compute output tensor values using activation
* tanh
*/
void act_tanh(const float input[], float output[], int num_elems) {
for (long i = 0; i < num_elems; i++) {
output[i] = (2 / (1 + exp(-2 * input[i]))) - 1;
}
}
/**
* zdnn_tanh_test
*
* Handles all the logic to run custom tests.
*/
void zdnn_tanh_test(uint32_t *shape, zdnn_data_layouts layout, float *input,
zdnn_status expected_status, float *expected_values) {
/*
* Input Tensor
*/
zdnn_ztensor *input_ztensor = alloc_ztensor_with_values(
shape, layout, test_datatype, NO_CONCAT, false, input);
/*
* Output Tensor
*/
zdnn_ztensor *output_ztensor = alloc_ztensor_with_values(
shape, layout, test_datatype, NO_CONCAT, true, ZERO_ARRAY);
/*
* Begin Testing!
*/
zdnn_status status = zdnn_tanh(input_ztensor, output_ztensor);
TEST_ASSERT_MESSAGE_FORMATTED(
status == expected_status,
"call to zdnn_tanh() to returned status %08x but expected "
"%08x\n",
status, expected_status);
// Only check expected values if we expected the NNPA call to be successful
if (expected_status == ZDNN_OK) {
assert_ztensor_values(output_ztensor, false, expected_values);
}
// All done--clean up the tensor buffers
free_ztensor_buffers(2, input_ztensor, output_ztensor);
}
/*
-------------------------------------------------------------------------------
TanH Basic
Layout: NHWC
-------------------------------------------------------------------------------
*/
/**
* zdnn_tanh_basic_nhwc
*
* Simple test to demonstrate tanh
*
* Input values as NHWC sized (1,3,3,1):
* [[
* [[0.01], [0.02], [0.03]],
* [[0.04], [0.05], [0.06]],
* [[0.07], [0.08], [0.09]]
* ]]
*
* Expected Output values as NHWC sized (1,3,3,1):
* [[
* [[0.00999966667999946], [0.019997333759930933], [0.029991003238820143]],
* [[0.03997868031116357], [0.04995837495787998], [0.059928103529143496]],
* [[0.06988589031642899], [0.07982976911113136], [0.0897577847471601]
* ]]
*/
void zdnn_tanh_basic_nhwc_1() {
// Initialize the dimensions for our input tensor ZDNN_NHWC
uint32_t shape[] = {1, 3, 3, 1}; // Will be same for in and out dim.
float input_values[] = {0.01, 0.02, 0.03, 0.04, 0.05, 0.06, 0.07, 0.08, 0.09};
float expected_values[] = {
0.00999966667999946, 0.019997333759930933, 0.029991003238820143,
0.03997868031116357, 0.04995837495787998, 0.059928103529143496,
0.06988589031642899, 0.07982976911113136, 0.0897577847471601};
zdnn_tanh_test(shape, ZDNN_NHWC, input_values, ZDNN_OK, expected_values);
}
/**
* zdnn_tanh_zeros_nhwc
*
* Zero test to demonstrate tanh
*
* Input values as NHWC sized (1, 3, 3, 3):
* [[
* [[0,0,0], [0,0,0], [0,0,0]],
* [[0,0,0], [0,0,0], [0,0,0]],
* [[0,0,0], [0,0,0], [0,0,0]]
* ]]
*
* Expected Output values as NHWC sized (1, 3, 3, 3):
* [[
* [[0,0,0], [0,0,0], [0,0,0]],
* [[0,0,0], [0,0,0], [0,0,0]],
* [[0,0,0], [0,0,0], [0,0,0]]
* ]]
*/
void zdnn_tanh_zeros_nhwc_1() {
// Initialize the dimensions for our input tensor ZDNN_NHWC
uint32_t shape[] = {1, 3, 3, 3}; // Will be same for in and out dim.
float input_values[] = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0};
float expected_values[] = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0};
zdnn_tanh_test(shape, ZDNN_NHWC, input_values, ZDNN_OK, expected_values);
}
/**
* zdnn_tanh_negative_nhwc
*
* Negative test to demonstrate tanh
*
* Input values as NHWC sized (1,3,3,1):
* [[
* [[-0.01], [-0.02], [-0.03]],
* [[-0.04], [-0.05], [-0.06]],
* [[-0.07], [-0.08], [-0.09]]
* ]]
*
* Expected Output values as NHWC sized (1,3,3,1):
* [[
* [[-0.00999966667999946], [-0.019997333759930933],
* [-0.029991003238820143]],
* [[-0.03997868031116357], [-0.04995837495787998], [-0.059928103529143496]],
* [[-0.06988589031642899], [-0.07982976911113136], [-0.0897577847471601]
* ]]
*/
void zdnn_tanh_negative_nhwc_1() {
// Initialize the dimensions for our input tensor ZDNN_NHWC
uint32_t shape[] = {1, 3, 3, 1}; // Will be same for in and out dim.
float input_values[] = {-0.01, -0.02, -0.03, -0.04, -0.05,
-0.06, -0.07, -0.08, -0.09};
float expected_values[] = {
-0.00999966667999946, -0.019997333759930933, -0.029991003238820143,
-0.03997868031116357, -0.04995837495787998, -0.059928103529143496,
-0.06988589031642899, -0.07982976911113136, -0.0897577847471601};
zdnn_tanh_test(shape, ZDNN_NHWC, input_values, ZDNN_OK, expected_values);
}
/**
* zdnn_tanh_positive_nhwc
*
* Positive test to demonstrate tanh
*
* Input values as NHWC sized (4, 1, 1, 1)
* [[
* [[0.01]],
* [[0.02]],
* [[0.03]],
* [[0.04]],
* ]]
*
* Expected Output values as NHWC sized (4, 1, 1, 1):
* [[
* [[0.00999966667999946]],
* [[0.019997333759930933]],
* [[0.029991003238820143]],
* [[0.03997868031116357]],
* ]]
*/
void zdnn_tanh_positive_nhwc_1() {
uint32_t shape[] = {4, 1, 1, 1}; // Will be same for in and out dim.
float input_values[] = {0.01, 0.02, 0.03, 0.04};
float expected_values[] = {0.00999966667999946, 0.019997333759930933,
0.029991003238820143, 0.03997868031116357};
zdnn_tanh_test(shape, ZDNN_NHWC, input_values, ZDNN_OK, expected_values);
}
/**
* zdnn_tanh_balanced_nhwc
*
* Balanced (pos and neg inputs) test to demonstrate tanh
*
* Input values as NHWC sized (1, 1, 2, 6)
* [[
* [[-0.05, -0.04, -0.03, -0.02, -0.01, -0.00],
* [0.01, 0.02, 0.03, 0.04, 0.05, 0.06]]
* ]]
*
* Expected Output values as NHWC sized (1, 1, 2, 6):
* [[
* [[-0.04995837495787998, -0.03997868031116357, -0.029991003238820143,
* -0.019997333759930933, -0.00999966667999946, 0.0],
* [0.00999966667999946, 0.019997333759930933, 0.029991003238820143,
* 0.03997868031116357, 0.04995837495787998, 0.059928103529143496]]
* ]]
*/
void zdnn_tanh_balanced_nhwc_1() {
// Initialize the dimensions for our input tensor ZDNN_NHWC
uint32_t shape[] = {1, 1, 2, 6}; // Will be same for in and out dim.
float input_values[] = {-0.05, -0.04, -0.03, -0.02, -0.01, 0.0,
0.01, 0.02, 0.03, 0.04, 0.05, 0.06};
float expected_values[] = {
-0.04995837495787998, -0.03997868031116357, -0.029991003238820143,
-0.019997333759930933, -0.00999966667999946, 0.0,
0.00999966667999946, 0.019997333759930933, 0.029991003238820143,
0.03997868031116357, 0.04995837495787998, 0.059928103529143496};
zdnn_tanh_test(shape, ZDNN_NHWC, input_values, ZDNN_OK, expected_values);
}
/*
-------------------------------------------------------------------------------
TanH Basic
Layout: ZDNN_3D
-------------------------------------------------------------------------------
*/
/**
* zdnn_tanh_basic_3d
*
* Simple test to demonstrate tanh
*
* Input values as NWC sized (1,3,1):
* [[
* [[0.01], [0.02], [0.03]],
* [[0.04], [0.05], [0.06]],
* [[0.07], [0.08], [0.09]]
* ]]
*
* Expected Output values as NWC sized (1,3,1):
* [[
* [[0.00999966667999946], [0.019997333759930933], [0.029991003238820143]],
* [[0.03997868031116357], [0.04995837495787998], [0.059928103529143496]],
* [[0.06988589031642899], [0.07982976911113136], [0.0897577847471601]
* ]]
*/
void zdnn_tanh_basic_3d_1() {
// Initialize the dimensions for our input tensor ZDNN_3D
uint32_t shape[] = {1, 3, 1}; // Will be same for in and out dim.
float input_values[] = {0.01, 0.02, 0.03, 0.04, 0.05, 0.06, 0.07, 0.08, 0.09};
float expected_values[] = {
0.00999966667999946, 0.019997333759930933, 0.029991003238820143,
0.03997868031116357, 0.04995837495787998, 0.059928103529143496,
0.06988589031642899, 0.07982976911113136, 0.0897577847471601};
zdnn_tanh_test(shape, ZDNN_3D, input_values, ZDNN_OK, expected_values);
}
/**
* zdnn_tanh_zeros_3d
*
* Zero test to demonstrate tanh
*
* Input values as NWC sized (1,3,3):
* [[
* [[0,0,0], [0,0,0], [0,0,0]],
* [[0,0,0], [0,0,0], [0,0,0]],
* [[0,0,0], [0,0,0], [0,0,0]]
* ]]
*
* Expected Output values as NWC sized (1,3,3):
* [[
* [[0,0,0], [0,0,0], [0,0,0]],
* [[0,0,0], [0,0,0], [0,0,0]],
* [[0,0,0], [0,0,0], [0,0,0]]
* ]]
*/
void zdnn_tanh_zeros_3d_1() {
// Initialize the dimensions for our input tensor ZDNN_3D
uint32_t shape[] = {1, 3, 3}; // Will be same for in and out dim.
float input_values[] = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0};
float expected_values[] = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0};
zdnn_tanh_test(shape, ZDNN_3D, input_values, ZDNN_OK, expected_values);
}
/**
* zdnn_tanh_negative_3d
*
* Negative test to demonstrate tanh
*
* Input values as NWC sized (1,3,3):
* [[
* [[-0.01], [-0.02], [-0.03]],
* [[-0.04], [-0.05], [-0.06]],
* [[-0.07], [-0.08], [-0.09]]
* ]]
*
* Expected Output values as NWC sized (1,3,3):
* [[
* [[-0.00999966667999946], [-0.019997333759930933],
* [-0.029991003238820143]],
* [[-0.03997868031116357], [-0.04995837495787998], [-0.059928103529143496]],
* [[-0.06988589031642899], [-0.07982976911113136], [-0.0897577847471601]
* ]]
*/
void zdnn_tanh_negative_3d_1() {
// Initialize the dimensions for our input tensor ZDNN_3D
uint32_t shape[] = {1, 3, 3}; // Will be same for in and out dim.
float input_values[] = {-0.01, -0.02, -0.03, -0.04, -0.05,
-0.06, -0.07, -0.08, -0.09};
float expected_values[] = {
-0.00999966667999946, -0.019997333759930933, -0.029991003238820143,
-0.03997868031116357, -0.04995837495787998, -0.059928103529143496,
-0.06988589031642899, -0.07982976911113136, -0.0897577847471601};
zdnn_tanh_test(shape, ZDNN_3D, input_values, ZDNN_OK, expected_values);
}
/**
* zdnn_tanh_positive_3d
*
* Positive test to demonstrate tanh
*
*
* Input values as NWC sized (4, 1, 1)
* [[
* [[0.01]],
* [[0.02]],
* [[0.03]],
* [[0.04]],
* ]]
*
* Expected Output values as NWC sized (4, 1, 1):
* [[
* [[0.00999966667999946]],
* [[0.019997333759930933]],
* [[0.029991003238820143]],
* [[0.03997868031116357]],
* ]]
*/
void zdnn_tanh_positive_3d_1() {
// Initialize the dimensions for our input tensor ZDNN_3D
uint32_t shape[] = {4, 1, 1}; // Will be same for in and out dim.
float input_values[] = {0.01, 0.02, 0.03, 0.04};
float expected_values[] = {0.00999966667999946, 0.019997333759930933,
0.029991003238820143, 0.03997868031116357};
zdnn_tanh_test(shape, ZDNN_3D, input_values, ZDNN_OK, expected_values);
}
/**
* zdnn_tanh_balanced_3d
*
* Balanced (pos and neg inputs) test to demonstrate tanh
*
* Input values as NWC sized (1, 2, 6)
* [[
* [[-0.05, -0.04, -0.03, -0.02, -0.01, -0.00],
* [0.01, 0.02, 0.03, 0.04, 0.05, 0.06]]
* ]]
*
* Expected Output values as NWC sized (1, 2, 6):
* [[
* [[-0.04995837495787998, -0.03997868031116357, -0.029991003238820143,
* -0.019997333759930933, -0.00999966667999946, 0.0],
* [0.00999966667999946, 0.019997333759930933, 0.029991003238820143,
* 0.03997868031116357, 0.04995837495787998, 0.059928103529143496]]
* ]]
*
*/
void zdnn_tanh_balanced_3d_1() {
// Initialize the dimensions for our input tensor ZDNN_3D
uint32_t shape[] = {1, 2, 6}; // Will be same for in and out dim.
float input_values[] = {-0.05, -0.04, -0.03, -0.02, -0.01, 0.0,
0.01, 0.02, 0.03, 0.04, 0.05, 0.06};
float expected_values[] = {
-0.04995837495787998, -0.03997868031116357, -0.029991003238820143,
-0.019997333759930933, -0.00999966667999946, 0.0,
0.00999966667999946, 0.019997333759930933, 0.029991003238820143,
0.03997868031116357, 0.04995837495787998, 0.059928103529143496};
zdnn_tanh_test(shape, ZDNN_3D, input_values, ZDNN_OK, expected_values);
}
/*
-------------------------------------------------------------------------------
TanH Basic
Layout: NHWC
-------------------------------------------------------------------------------
*/
/**
* zdnn_tanh_basic_nhwc
*
* Simple test to demonstrate tanh
*
* Input values as NHWC sized (1,3,3,1):
* [[
* [[1], [2], [3]],
* [[4], [5], [6]],
* [[7], [8], [9]]
* ]]
*
* Expected Output values as NHWC sized (1,3,3,1):
* [[
* [[0.761594156], [0.9640275801], [0.9950547537]],
* [[0.9993292997], [0.9999092043], [0.9999877117]],
* [[0.9999983369], [0.9999997749], [0.9999999695]
* ]]
*/
void zdnn_tanh_basic_nhwc_2() {
// Initialize the dimensions for our input tensor ZDNN_NHWC
uint32_t shape[] = {1, 3, 3, 1}; // Will be same for in and out dim.
float input_values[] = {1, 2, 3, 4, 5, 6, 7, 8, 9};
float expected_values[] = {
0.761594156, 0.9640275801, 0.9950547537, 0.9993292997, 0.9999092043,
0.9999877117, 0.9999983369, 0.9999997749, 0.9999999695,
};
zdnn_tanh_test(shape, ZDNN_NHWC, input_values, ZDNN_OK, expected_values);
}
/**
* zdnn_tanh_zeros_nhwc
*
* Zero test to demonstrate tanh
*
* Input values as NHWC sized (1, 3, 3, 3):
* [[
* [[0,0,0], [0,0,0], [0,0,0]],
* [[0,0,0], [0,0,0], [0,0,0]],
* [[0,0,0], [0,0,0], [0,0,0]]
* ]]
*
* Expected Output values as NHWC sized (1, 3, 3, 3):
* [[
* [[0,0,0], [0,0,0], [0,0,0]],
* [[0,0,0], [0,0,0], [0,0,0]],
* [[0,0,0], [0,0,0], [0,0,0]]
* ]]
*/
void zdnn_tanh_zeros_nhwc_2() {
// Initialize the dimensions for our input tensor ZDNN_NHWC
uint32_t shape[] = {1, 3, 3, 3}; // Will be same for in and out dim.
float input_values[] = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0};
float expected_values[] = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0};
zdnn_tanh_test(shape, ZDNN_NHWC, input_values, ZDNN_OK, expected_values);
}
/**
* zdnn_tanh_negative_nhwc
*
* Negative test to demonstrate tanh
*
* Input values as NHWC sized (1, 3, 3, 1):
* [[
* [[-1], [-2], [-3]],
* [[-4], [-5], [-6]],
* [[-7], [-8], [-9]]
* ]]
*
* Expected Output values as NHWC sized (1, 3, 3, 1):
* [[
* [[-0.761594156], [-0.9640275801], [-0.9950547537]],
* [[-0.9993292997], [-0.9999092043], [-0.9999877117]],
* [[-0.9999983369], [-0.9999997749], [-0.9999999695]]
* ]]
*/
void zdnn_tanh_negative_nhwc_2() {
// Initialize the dimensions for our input tensor ZDNN_NHWC
uint32_t shape[] = {1, 3, 3, 1}; // Will be same for in and out dim.
float input_values[] = {-1, -2, -3, -4, -5, -6, -7, -8, -9};
float expected_values[] = {
-0.761594156, -0.9640275801, -0.9950547537, -0.9993292997, -0.9999092043,
-0.9999877117, -0.9999983369, -0.9999997749, -0.9999999695,
};
zdnn_tanh_test(shape, ZDNN_NHWC, input_values, ZDNN_OK, expected_values);
}
/**
* zdnn_tanh_positive_nhwc
*
* Positive test to demonstrate tanh
*
* Input values as NHWC sized (9, 1, 1, 1)
* [[
* [[1]],
* [[2]],
* [[3]],
* [[4]],
* [[5]],
* [[6]],
* [[7]],
* [[8]],
* [[9]],
* ]]
*
* Expected Output values as NHWC sized (9, 1, 1, 1):
* [[
* [[0.761594156]],
* [[0.9640275801]],
* [[0.9950547537]],
* [[0.9993292997]],
* [[0.9999092043]],
* [[0.9999877117]],
* [[0.9999983369]],
* [[0.9999997749]],
* [[0.9999999695]],
* ]]
*/
void zdnn_tanh_positive_nhwc_2() {
uint32_t shape[] = {9, 1, 1, 1}; // Will be same for in and out dim.
float input_values[] = {1, 2, 3, 4, 5, 6, 7, 8, 9};
float expected_values[] = {
0.761594156, 0.9640275801, 0.9950547537, 0.9993292997, 0.9999092043,
0.9999877117, 0.9999983369, 0.9999997749, 0.9999999695,
};
zdnn_tanh_test(shape, ZDNN_NHWC, input_values, ZDNN_OK, expected_values);
}
/**
* zdnn_tanh_balanced_nhwc
*
* Balanced (pos and neg inputs) test to demonstrate tanh
*
* Input values as NHWC sized (1, 1, 3, 5)
* [[
* [[-4, -2, 0, 2, 4], [-3, -1, 0, 1, 3], [-8, -6, 0, 6, 8]]
* ]]
*
* Expected Output values as NHWC sized (1, 1, 3, 5):
* [[
* [[ -0.9993292997, -0.9640275801, 0.0, 0.9640275801, 0.9993292997],
* [-0.9950547537, -0.761594156, 0.0, 0.761594156, 0.9950547537],
* [-0.9999997749, -0.9999877117, 0.0, 0.9999877117, 0.9999997749]]
* ]]
*/
void zdnn_tanh_balanced_nhwc_2() {
// Initialize the dimensions for our input tensor ZDNN_NHWC
uint32_t shape[] = {1, 1, 3, 5}; // Will be same for in and out dim.
float input_values[] = {-4, -2, 0, 2, 4, -3, -1, 0, 1, 3, -8, -6, 0, 6, 8};
float expected_values[] = {
-0.9993292997, -0.9640275801, 0.0, 0.9640275801, 0.9993292997,
-0.9950547537, -0.761594156, 0.0, 0.761594156, 0.9950547537,
-0.9999997749, -0.9999877117, 0.0, 0.9999877117, 0.9999997749,
};
zdnn_tanh_test(shape, ZDNN_NHWC, input_values, ZDNN_OK, expected_values);
}
/*
-------------------------------------------------------------------------------
TanH Basic
Layout: ZDNN_3D
-------------------------------------------------------------------------------
*/
/**
* zdnn_tanh_basic_3d
*
* Simple test to demonstrate tanh
*
* Input values as NWC sized (1,3,1):
* [[
* [[1], [2], [3]],
* [[4], [5], [6]],
* [[7], [8], [9]]
* ]]
*
* Expected Output values as NWC sized (1,3,1):
* [[
* [[0.761594156], [0.9640275801], [0.9950547537]],
* [[0.9993292997], [0.9999092043], [0.9999877117]],
* [[0.9999983369], [0.9999997749], [0.9999999695]
* ]]
*/
void zdnn_tanh_basic_3d_2() {
// Initialize the dimensions for our input tensor ZDNN_3D
uint32_t shape[] = {1, 3, 1}; // Will be same for in and out dim.
float input_values[] = {1, 2, 3, 4, 5, 6, 7, 8, 9};
float expected_values[] = {
0.761594156, 0.9640275801, 0.9950547537, 0.9993292997, 0.9999092043,
0.9999877117, 0.9999983369, 0.9999997749, 0.9999999695,
};
zdnn_tanh_test(shape, ZDNN_3D, input_values, ZDNN_OK, expected_values);
}
/**
* zdnn_tanh_zeros_3d
*
* Zero test to demonstrate tanh
*
* Input values as NWC sized (1,3,3):
* [[
* [[0,0,0], [0,0,0], [0,0,0]],
* [[0,0,0], [0,0,0], [0,0,0]],
* [[0,0,0], [0,0,0], [0,0,0]]
* ]]
*
* Expected Output values as NWC sized (1,3,3):
* [[
* [[0,0,0], [0,0,0], [0,0,0]],
* [[0,0,0], [0,0,0], [0,0,0]],
* [[0,0,0], [0,0,0], [0,0,0]]
* ]]
*/
void zdnn_tanh_zeros_3d_2() {
// Initialize the dimensions for our input tensor ZDNN_3D
uint32_t shape[] = {1, 3, 3}; // Will be same for in and out dim.
float input_values[] = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0};
float expected_values[] = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0};
zdnn_tanh_test(shape, ZDNN_3D, input_values, ZDNN_OK, expected_values);
}
/**
* zdnn_tanh_negative_3d
*
* Negative test to demonstrate tanh
*
* Input values as NWC sized (1,3,3):
* [[
* [[-1.0], [-2.1], [-3.2]],
* [[-4.3], [-5.4], [-6.5]],
* [[-7.6], [-8.7], [-9.8]]
* ]]
*
* Expected Output values as NWC sized (1,3,3):
* [[
* [[-0.761594156], [-0.9704519366], [-0.9966823978]],
* [[-0.9996318562], [-0.9999592018], [-0.9999954794]],
* [[-0.9999994991], [-0.9999999445], [-0.9999999939]]
* ]]
*/
void zdnn_tanh_negative_3d_2() {
// Initialize the dimensions for our input tensor ZDNN_3D
uint32_t shape[] = {1, 3, 3}; // Will be same for in and out dim.
float input_values[] = {-1.0, -2.1, -3.2, -4.3, -5.4, -6.5, -7.6, -8.7, -9.8};
float expected_values[] = {
-0.761594156, -0.9704519366, -0.9966823978, -0.9996318562, -0.9999592018,
-0.9999954794, -0.9999994991, -0.9999999445, -0.9999999939,
};
zdnn_tanh_test(shape, ZDNN_3D, input_values, ZDNN_OK, expected_values);
}
/**
* zdnn_tanh_positive_3d
*
* Positive test to demonstrate tanh
*
*
* * Input values as NWC sized (8, 1, 1)
* [[
* [[1.0]],
* [[2.1]],
* [[3.2]],
* [[4.3]],
* [[5.4]],
* [[6.5]],
* [[7.6]],
* [[8.7]]
* ]]
*
* Expected Output values as NWC sized (8, 1, 1):
* [[
* [[0.761594156]],
* [[0.9704519366]],
* [[0.9966823978]],
* [[0.9996318562]],
* [[0.9999592018]],
* [[0.9999954794]],
* [[0.9999994991]],
* [[0.9999999445]]
* ]]
*/
void zdnn_tanh_positive_3d_2() {
// Initialize the dimensions for our input tensor ZDNN_3D
uint32_t shape[] = {8, 1, 1}; // Will be same for in and out dim.
float input_values[] = {1.0, 2.1, 3.2, 4.3, 5.4, 6.5, 7.6, 8.7};
float expected_values[] = {0.761594156, 0.9704519366, 0.9966823978,
0.9996318562, 0.9999592018, 0.9999954794,
0.9999994991, 0.9999999445};
zdnn_tanh_test(shape, ZDNN_3D, input_values, ZDNN_OK, expected_values);
}
/**
* zdnn_tanh_balanced_3d
*
* Balanced (pos and neg inputs) test to demonstrate tanh
*
* Input values as NWC sized (1, 3, 5)
* [[
* [[-4, -2, 0, 2, 4], [-3, -1, 0, 1, 3], [-8, -6, 0, 6, 8]]
* ]]
*
* Expected Output values as NWC sized (1 3, 5):
* [[
* [[ -0.9993292997, -0.9640275801, 0.0, 0.9640275801, 0.9993292997],
* [-0.9950547537, -0.761594156, 0.0, 0.761594156, 0.9950547537],
* [-0.9999997749, -0.9999877117, 0.0, 0.9999877117, 0.9999997749]]
* ]]
*
*/
void zdnn_tanh_balanced_3d_2() {
// Initialize the dimensions for our input tensor ZDNN_3D
uint32_t shape[] = {1, 3, 5}; // Will be same for in and out dim.
float input_values[] = {-4, -2, 0, 2, 4, -3, -1, 0, 1, 3, -8, -6, 0, 6, 8};
float expected_values[] = {
-0.9993292997, -0.9640275801, 0.0, 0.9640275801, 0.9993292997,
-0.9950547537, -0.761594156, 0.0, 0.761594156, 0.9950547537,
-0.9999997749, -0.9999877117, 0.0, 0.9999877117, 0.9999997749,
};
zdnn_tanh_test(shape, ZDNN_3D, input_values, ZDNN_OK, expected_values);
}
/*
-------------------------------------------------------------------------------
TANH Large
Layout: NCHW
-------------------------------------------------------------------------------
*/
/**
* zdnn_tanh_basic_nhwc_large
*
* - ZDNN_3D
* Simple test of positive input.
*
* Generate a test that is of size 40x30x15x1
* and use automatic float generator to create
* input values.
*
* Output will contain tensor of size size 40x30x15x1.
*/
void zdnn_tanh_basic_nhwc_large() {
// Initialize the dimensions for our input tensor ZDNN_NHWC
uint32_t shape[] = {1, 15, 30, 43}; // Will be same for in and out dim.
int num_io_buffer_values = shape[3] * shape[2] * shape[1] * shape[0];
float input_values[num_io_buffer_values];
gen_random_float_array(num_io_buffer_values, input_values);
float expected_values[num_io_buffer_values];
act_tanh(input_values, expected_values, num_io_buffer_values);
zdnn_tanh_test(shape, ZDNN_NHWC, input_values, ZDNN_OK, expected_values);
}
/**
* zdnn_tanh_zeros_nhwc_large
*
* Simple test of all zero input.
*
* Generate a test that is of size 80x40x20x1
* and use automatic float generator to create
* input values.
*
* Output will contain tensor of size size 80x40x20x1
*/
void zdnn_tanh_zeros_nhwc_large() {
// Initialize the dimensions for our input tensor ZDNN_NHWC
uint32_t shape[] = {1, 20, 40, 80}; // Will be same for in and out dim.
int num_io_buffer_values = shape[3] * shape[2] * shape[1] * shape[0];
float input_values[num_io_buffer_values];
fill_all_with_zero_float_array(num_io_buffer_values, input_values);
float expected_values[num_io_buffer_values];
act_tanh(input_values, expected_values, num_io_buffer_values);
zdnn_tanh_test(shape, ZDNN_NHWC, input_values, ZDNN_OK, expected_values);
}
/**
* zdnn_tanh_negative_nhwc_large
*
* Simple test of all negative input values.
*
* Generate a test that is of size 80x23x10x1
* and use automatic float generator to create
* input values.
*
* Output will contain tensor of size size 80x23x10x1
*/
void zdnn_tanh_negative_nhwc_large() {
// Initialize the dimensions for our input tensor ZDNN_NHWC
uint32_t shape[] = {1, 10, 28, 83}; // Will be same for in and out dim.
int num_io_buffer_values = shape[3] * shape[2] * shape[1] * shape[0];
float input_values[num_io_buffer_values];
gen_random_float_array_neg(num_io_buffer_values, input_values);
float expected_values[num_io_buffer_values];
act_tanh(input_values, expected_values, num_io_buffer_values);
zdnn_tanh_test(shape, ZDNN_NHWC, input_values, ZDNN_OK, expected_values);
}
/**
* zdnn_tanh_balanced_nhwc_large
*
* Simple test of half negative and positive inputs.
*
* Generate a test that is of size 56x12x10x1
* and use automatic float generator to create
* input values.
*
* Output will contain tensor of size size 56x12x10x1
*/
void zdnn_tanh_balanced_nhwc_large() {
// Initialize the dimensions for our input tensor ZDNN_NHWC
uint32_t shape[] = {1, 10, 12, 56}; // Will be same for in and out dim.
int num_io_buffer_values = shape[3] * shape[2] * shape[1] * shape[0];
float input_values[num_io_buffer_values];
gen_random_float_array_pos_neg(num_io_buffer_values, input_values);
float expected_values[num_io_buffer_values];
act_tanh(input_values, expected_values, num_io_buffer_values);
zdnn_tanh_test(shape, ZDNN_NHWC, input_values, ZDNN_OK, expected_values);
}
/*
-------------------------------------------------------------------------------
TANH Large
Layout: ZDNN_3D
-------------------------------------------------------------------------------
*/
/**
* zdnn_tanh_basic_3d_large
*
* Simple test of positive input.
*
* Generate a test that is of size 10x10x10.
* and use automatic float generator to create
* input values.
*
* Output will contain tensor of size size 10x10x10.
*/
void zdnn_tanh_basic_3d_large() {
// Initialize the dimensions for our input tensor ZDNN_3D
uint32_t shape[] = {10, 10, 10}; // Will be same for in and out dim.
int num_io_buffer_values = shape[2] * shape[1] * shape[0];
float input_values[num_io_buffer_values];
gen_random_float_array(num_io_buffer_values, input_values);
float expected_values[num_io_buffer_values];
act_tanh(input_values, expected_values, num_io_buffer_values);
zdnn_tanh_test(shape, ZDNN_3D, input_values, ZDNN_OK, expected_values);
}
/**
* zdnn_tanh_zeros_3d_large
*
* Simple test of all zero input.
*
* Generate a test that is of size 15x5x3
* and use automatic float generator to create
* input values.
*
* Output will contain tensor of size size 15x5x3
*/
void zdnn_tanh_zeros_3d_large() {
// Initialize the dimensions for our input tensor ZDNN_3D
uint32_t shape[] = {3, 5, 13}; // Will be same for in and out dim.
int num_io_buffer_values = shape[2] * shape[1] * shape[0];
float input_values[num_io_buffer_values];
fill_all_with_zero_float_array(num_io_buffer_values, input_values);
float expected_values[num_io_buffer_values];
act_tanh(input_values, expected_values, num_io_buffer_values);
zdnn_tanh_test(shape, ZDNN_3D, input_values, ZDNN_OK, expected_values);
}
/**
* zdnn_tanh_negative_3d_large
*
* Simple test of all negative input values.
*
* Generate a test that is of size 20x15x10
* and use automatic float generator to create
* input values.
*
* Output will contain tensor of size size 20x15x10
*/
void zdnn_tanh_negative_3d_large() {
// Initialize the dimensions for our input tensor ZDNN_3D
uint32_t shape[] = {20, 15, 10}; // Will be same for in and out dim.
int num_io_buffer_values = shape[2] * shape[1] * shape[0];
float input_values[num_io_buffer_values];
gen_random_float_array_neg(num_io_buffer_values, input_values);
float expected_values[num_io_buffer_values];
act_tanh(input_values, expected_values, num_io_buffer_values);
zdnn_tanh_test(shape, ZDNN_3D, input_values, ZDNN_OK, expected_values);
}
/**
* zdnn_tanh_balanced_3d_large
*
* Simple test of half negative and positive inputs.
*
* Generate a test that is of size 30x3x3
* and use automatic float generator to create
* input values.
*
* Output will contain tensor of size size 30x3x3
*/
void zdnn_tanh_balanced_3d_large() {
// Initialize the dimensions for our input tensor ZDNN_3D
uint32_t shape[] = {3, 3, 30}; // Will be same for in and out dim.
int num_io_buffer_values = shape[2] * shape[1] * shape[0];
float input_values[num_io_buffer_values];
gen_random_float_array_neg(num_io_buffer_values, input_values);
float expected_values[num_io_buffer_values];
act_tanh(input_values, expected_values, num_io_buffer_values);
zdnn_tanh_test(shape, ZDNN_3D, input_values, ZDNN_OK, expected_values);
}
int main() {
UNITY_BEGIN();
RUN_TEST_ALL_DLFLOAT16_PRE_DATATYPES(zdnn_tanh_basic_nhwc_1);
RUN_TEST_ALL_DLFLOAT16_PRE_DATATYPES(zdnn_tanh_zeros_nhwc_1);
RUN_TEST_ALL_DLFLOAT16_PRE_DATATYPES(zdnn_tanh_negative_nhwc_1);
RUN_TEST_ALL_DLFLOAT16_PRE_DATATYPES(zdnn_tanh_positive_nhwc_1);
RUN_TEST_ALL_DLFLOAT16_PRE_DATATYPES(zdnn_tanh_balanced_nhwc_1);
RUN_TEST_ALL_DLFLOAT16_PRE_DATATYPES(zdnn_tanh_basic_3d_1);
RUN_TEST_ALL_DLFLOAT16_PRE_DATATYPES(zdnn_tanh_zeros_3d_1);
RUN_TEST_ALL_DLFLOAT16_PRE_DATATYPES(zdnn_tanh_negative_3d_1);
RUN_TEST_ALL_DLFLOAT16_PRE_DATATYPES(zdnn_tanh_positive_3d_1);
RUN_TEST_ALL_DLFLOAT16_PRE_DATATYPES(zdnn_tanh_balanced_3d_1);
RUN_TEST_ALL_DLFLOAT16_PRE_DATATYPES(zdnn_tanh_basic_nhwc_2);
RUN_TEST_ALL_DLFLOAT16_PRE_DATATYPES(zdnn_tanh_zeros_nhwc_2);
RUN_TEST_ALL_DLFLOAT16_PRE_DATATYPES(zdnn_tanh_negative_nhwc_2);
RUN_TEST_ALL_DLFLOAT16_PRE_DATATYPES(zdnn_tanh_positive_nhwc_2);
RUN_TEST_ALL_DLFLOAT16_PRE_DATATYPES(zdnn_tanh_balanced_nhwc_2);
RUN_TEST_ALL_DLFLOAT16_PRE_DATATYPES(zdnn_tanh_basic_nhwc_large);
RUN_TEST_ALL_DLFLOAT16_PRE_DATATYPES(zdnn_tanh_zeros_nhwc_large);
RUN_TEST_ALL_DLFLOAT16_PRE_DATATYPES(zdnn_tanh_negative_nhwc_large);
RUN_TEST_ALL_DLFLOAT16_PRE_DATATYPES(zdnn_tanh_balanced_nhwc_large);
RUN_TEST_ALL_DLFLOAT16_PRE_DATATYPES(zdnn_tanh_basic_3d_2);
RUN_TEST_ALL_DLFLOAT16_PRE_DATATYPES(zdnn_tanh_zeros_3d_2);
RUN_TEST_ALL_DLFLOAT16_PRE_DATATYPES(zdnn_tanh_negative_3d_2);
RUN_TEST_ALL_DLFLOAT16_PRE_DATATYPES(zdnn_tanh_positive_3d_2);
RUN_TEST_ALL_DLFLOAT16_PRE_DATATYPES(zdnn_tanh_balanced_3d_2);
RUN_TEST_ALL_DLFLOAT16_PRE_DATATYPES(zdnn_tanh_basic_3d_large);
RUN_TEST_ALL_DLFLOAT16_PRE_DATATYPES(zdnn_tanh_zeros_3d_large);
RUN_TEST_ALL_DLFLOAT16_PRE_DATATYPES(zdnn_tanh_negative_3d_large);
RUN_TEST_ALL_DLFLOAT16_PRE_DATATYPES(zdnn_tanh_balanced_3d_large);
return UNITY_END();
}
zDNN-1.1.2/tests/testsupport.c 0000664 0000000 0000000 00000163216 15000221702 0016244 0 ustar 00root root 0000000 0000000 // SPDX-License-Identifier: Apache-2.0
/*
* Copyright IBM Corp. 2021, 2024
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
// Allows struct timeval to work on z/OS. Must be before include
#ifdef __MVS__
#define _XOPEN_SOURCE_EXTENDED 1
#undef _ALL_SOURCE
#endif
#include "testsupport.h"
#include "zdnn.h"
#include "zdnn_private.h"
#include
#include
#include
#include
#include
#include
#include
#include
#include
char error_message[ERROR_MESSAGE_STR_LENGTH];
float ZERO_ARRAY[1] = {0};
// Custom FP tolerance for tests to set and use, if needed
fp_tolerance tol_bfloat = {0, 0}, tol_fp16 = {0, 0}, tol_fp32 = {0, 0};
zdnn_concat_info prev_layers[NUM_PREV_LAYERS] = {PREV_LAYER_UNI,
PREV_LAYER_BIDIR};
zdnn_concat_info biases_usages[NUM_BIASES_USAGES] = {USAGE_BIASES,
USAGE_HIDDEN_BIASES};
zdnn_concat_info no_vconcat_infos[NUM_NO_VCONCAT_INFOS] = {
PREV_LAYER_UNI | USAGE_HIDDEN_WEIGHTS,
PREV_LAYER_BIDIR | USAGE_HIDDEN_WEIGHTS,
PREV_LAYER_UNI | USAGE_WEIGHTS,
};
// a simple (dumb) routine to convert a NHWC datastream to NCHW
void nhwc_2_nchw(void *nhwc_ptr, uint32_t n, uint32_t h, uint32_t w, uint32_t c,
int element_size, void *nchw_ptr) {
uint32_t nx, hx, wx, cx;
for (nx = 0; nx < n; nx++) {
for (hx = 0; hx < h; hx++) {
for (wx = 0; wx < w; wx++) {
for (cx = 0; cx < c; cx++) {
uint64_t nhwc_idx = nx * (h * w * c) + hx * (w * c) + wx * (c) + cx;
uint64_t nchw_idx = nx * (c * h * w) + cx * (h * w) + hx * (w) + wx;
if (element_size == 2) {
((uint16_t *)nchw_ptr)[nchw_idx] = ((uint16_t *)nhwc_ptr)[nhwc_idx];
} else if (element_size == 4) {
((uint32_t *)nchw_ptr)[nchw_idx] = ((uint32_t *)nhwc_ptr)[nhwc_idx];
} else if (element_size == 8) {
((uint64_t *)nchw_ptr)[nchw_idx] = ((uint64_t *)nhwc_ptr)[nhwc_idx];
}
}
}
}
}
}
size_t *alloc_offsets(zdnn_ztensor *ztensor) {
// create offsets array using the formulas described in the z/Architecture
// Principles of Operation
uint64_t total_elements = get_num_elements(ztensor, ELEMENTS_PRE);
size_t *offsets = malloc(total_elements * sizeof(size_t));
uint32_t e4 = ztensor->transformed_desc->dim4,
e3 = ztensor->transformed_desc->dim3,
e2 = ztensor->transformed_desc->dim2,
e1 = ztensor->transformed_desc->dim1;
uint8_t eps = ztensor->transformed_desc->type != ZDNN_BINARY_INT8
? AIU_2BYTE_CELLS_PER_STICK
: AIU_1BYTE_CELLS_PER_STICK;
uint64_t c = 0;
switch (ztensor->transformed_desc->format) {
case ZDNN_FORMAT_4DFEATURE: {
uint32_t e2_limit = CEIL(e2, 32) * 32;
uint32_t e1_limit = CEIL(e1, eps) * eps;
for (uint32_t e4x = 0; e4x < e4; e4x++) {
for (uint32_t e3x = 0; e3x < e3; e3x++) {
for (uint32_t e2x = 0; e2x < e2; e2x++) {
for (uint32_t e1x = 0; e1x < e1; e1x++) {
offsets[c] =
( // get to the correct N = e4x
(e3 * e2_limit * e1_limit * e4x) +
// get to the currect H = e3x, assuming e1x = 0
(e2_limit * e3x * eps) +
// get to the correct stick (e2x), still assuming e1x = 0
(e2x * eps) +
// jump to the correct e1x = [0..63] [64..127] of that stick
((uint32_t)(e1x / eps) * e2_limit * e3 * eps) +
// jump to correct element within the stick
(e1x % eps)) *
(128 / eps);
c++;
}
}
}
}
if (ztensor->pre_transformed_desc->layout == ZDNN_NCHW) {
size_t *tmp = malloc(total_elements * sizeof(size_t));
nhwc_2_nchw(offsets, e4, e3, e2, e1, sizeof(size_t), tmp);
free(offsets);
offsets = tmp;
}
break;
}
case ZDNN_FORMAT_4DKERNEL: {
uint32_t e2_limit = CEIL(e2, 32) * 32;
for (uint32_t e4x = 0; e4x < e4; e4x++) {
for (uint32_t e3x = 0; e3x < e3; e3x++) {
for (uint32_t e2x = 0; e2x < e2; e2x++) {
for (uint32_t e1x = 0; e1x < e1; e1x++) {
offsets[c] =
( // jump to the correct e1x = [0..63] [64..127] of that stick
((uint32_t)(e1x / eps) * e4 * e3 * e2_limit * eps) +
// get to the correct W = e3x, assuming e1x = 0
(e2_limit * e3x * eps) +
// get to the correct stick (e2x), still assuming e1x = 0
(e2x * eps) +
// get to the correct H
(e4x * e3 * e2_limit * eps) +
// jump to correct element within the stick
(e1x % eps)) *
(128 / eps);
c++;
}
}
}
}
break;
}
case ZDNN_FORMAT_4DWEIGHTS: {
uint32_t e2_limit = CEIL(e2, 64) * 64;
uint32_t e1_limit = CEIL(e1, 64) * 64;
for (uint32_t e4x = 0; e4x < e4; e4x++) {
for (uint32_t e3x = 0; e3x < e3; e3x++) {
for (uint32_t e2x = 0; e2x < e2; e2x++) {
for (uint32_t e1x = 0; e1x < e1; e1x++) {
offsets[c] =
// get to the correct N = e4x
(e4x * e3 * e2_limit * e1_limit) +
// get to the currect H = e3x, assuming e1x = 0
(e3x * e2_limit * 64) +
// get to the correct stick
((uint32_t)(e2x / 2) * 128) +
// jump to the correct e1x = [0..63] [64..127] of that stick
((uint32_t)(e1x / 64) * e2_limit * e3 * 64) +
// jump to the correct pair within the stick
((e1x * 2) % 128) +
// jump to correct entry within that pair
(e2x % 2);
c++;
}
}
}
}
break;
}
default:
TEST_FAIL_MESSAGE_FORMATTED("unknown transformed descriptor format: %d",
ztensor->transformed_desc->format);
}
return offsets;
}
size_t *alloc_rnn_offsets(const zdnn_ztensor *ztensor) {
// generate basic offsets based off vanilla ZDNN_2DS/ZDNN_3DS shape
zdnn_tensor_desc slice_t_desc;
zdnn_ztensor slice_ztensor;
size_t *offsets, *slice_offsets = NULL;
if (ztensor->transformed_desc->layout != ZDNN_BIDIR_FICO &&
ztensor->transformed_desc->layout != ZDNN_BIDIR_ZRH) {
// ZDNN_FICO/ZDNN_ZRH is like having a stickified vanilla ZDNN_2DS/ZDNN_3DS
// stitched together 4 (FICO) or 3 (ZRH) times.
//
// so we get the basic stificifed offsets for the ZDNN_2DS/ZDNN_3DS first,
// then duplicate it 2 or 3 more times while adding some offset to each
// value
zdnn_generate_transformed_desc(ztensor->pre_transformed_desc,
&slice_t_desc);
zdnn_init_ztensor(ztensor->pre_transformed_desc, &slice_t_desc,
&slice_ztensor);
slice_offsets = alloc_offsets(&slice_ztensor);
uint64_t slice_total_elements =
get_num_elements(&slice_ztensor, ELEMENTS_PRE);
uint64_t slice_size = zdnn_getsize_ztensor(slice_ztensor.transformed_desc);
short num_slices =
get_data_layout_num_gates(ztensor->transformed_desc->layout);
offsets = malloc(num_slices * slice_total_elements * sizeof(size_t));
// make num_slices copies of those offsets, each set is seperated by
// slice_size bytes
for (uint64_t i = 0; i < num_slices; i++) {
for (uint64_t j = 0; j < slice_total_elements; j++) {
offsets[i * slice_total_elements + j] =
slice_offsets[j] + i * slice_size;
}
}
} else {
zdnn_tensor_desc tmp_f_desc, tmp_t_desc;
zdnn_ztensor tmp_ztensor;
// get the basic stificifed offsets as if it were a ZDNN_3D of (2,
// PADDED(dim2 / 2), dim1).
// set dim3 = 2 to simulate the effect of dividing the entries into 2 halves
// don't care about num_dirs (dim3) for now
memcpy(&tmp_f_desc, ztensor->pre_transformed_desc,
sizeof(zdnn_tensor_desc));
tmp_f_desc.layout = ZDNN_3D;
tmp_f_desc.dim3 = 2;
tmp_f_desc.dim2 = PADDED(tmp_f_desc.dim2 / 2);
zdnn_generate_transformed_desc(&tmp_f_desc, &tmp_t_desc);
zdnn_init_ztensor(&tmp_f_desc, &tmp_t_desc, &tmp_ztensor);
size_t *tmp_offsets = alloc_offsets(&tmp_ztensor);
uint64_t tmp_ztensor_size =
zdnn_getsize_ztensor(tmp_ztensor.transformed_desc);
// we generated (2 * PADDED(dim2 / 2) * dim1) number of offsets, but we
// actually only care (dim2 / 2 * dim1) of those
uint64_t slice_total_elements = ztensor->pre_transformed_desc->dim2 *
ztensor->pre_transformed_desc->dim1;
// in the generated offsets array, only the first (slice_total_elements / 2)
// entries are valid because the entries follow are simply for the vertical
// paddings.
//
// the 2 halves are actually PADDED(dim2 / 2) * AIU_BYTES_PER_STICK bytes
// apart
for (int q = 0; q < slice_total_elements / 2; q++) {
tmp_offsets[slice_total_elements / 2 + q] =
tmp_offsets[q] + (PADDED(ztensor->pre_transformed_desc->dim2 / 2) *
AIU_BYTES_PER_STICK);
}
short num_slices =
get_data_layout_num_gates(ztensor->transformed_desc->layout);
offsets = malloc(ztensor->pre_transformed_desc->dim3 * num_slices *
slice_total_elements * sizeof(size_t));
// make num_slices * num_dirs copies of those offsets, each set is seperated
// by tmp_ztensor_size bytes
for (uint64_t i = 0; i < ztensor->pre_transformed_desc->dim3; i++) {
for (uint64_t j = 0; j < num_slices; j++) {
for (uint64_t k = 0; k < slice_total_elements; k++) {
offsets[i * num_slices * slice_total_elements +
j * slice_total_elements + k] =
tmp_offsets[k] + tmp_ztensor_size * (i * num_slices + j);
}
}
}
}
free(slice_offsets);
return offsets;
}
size_t *alloc_rnn_output_offsets(const zdnn_ztensor *ztensor) {
// basically the result is like (dim4 * dim3) pieces of ZDNN_2D (dim2, dim1)
// offsets stitched together, and everytime we replicate a piece we add some
// offset to it
zdnn_tensor_desc tmp_p_desc, tmp_t_desc;
zdnn_ztensor tmp_ztensor;
// create a ZDNN_2D (dim2, dim1) tensor and get the offsets of that
zdnn_init_pre_transformed_desc(ZDNN_2D, test_datatype, &tmp_p_desc,
ztensor->pre_transformed_desc->dim2,
ztensor->pre_transformed_desc->dim1);
zdnn_generate_transformed_desc(&tmp_p_desc, &tmp_t_desc);
zdnn_status status =
zdnn_init_ztensor_with_malloc(&tmp_p_desc, &tmp_t_desc, &tmp_ztensor);
TEST_ASSERT_MESSAGE_FORMATTED(
status == ZDNN_OK, "zdnn_init_ztensor_with_malloc() failed status = %08x",
status);
size_t *piece_offsets = alloc_offsets(&tmp_ztensor);
// each replication is seperated by this many bytes
uint64_t piece_size = zdnn_getsize_ztensor(&tmp_t_desc);
size_t *offsets = malloc(get_num_elements(ztensor, ELEMENTS_PRE_SINGLE_GATE) *
sizeof(size_t));
// replicate the offsets dim4*dim3 times
uint64_t c = 0;
for (uint32_t i = 0; i < ztensor->pre_transformed_desc->dim4 *
ztensor->pre_transformed_desc->dim3;
i++) {
for (uint32_t j = 0; j < ztensor->pre_transformed_desc->dim2 *
ztensor->pre_transformed_desc->dim1;
j++) {
offsets[c] = piece_size * i + piece_offsets[j];
c++;
}
}
return offsets;
}
/// Creates a data buffer with the provided float values converted to the
/// specified type
///
/// \note This method does not check that the size of values matches expected
/// number of elements.
///
/// Example usage:
/// Setup input tensor
/// \code
/// void *data = alloc_and_convert_float_values(num_values, type, values);
/// \endcode
///
/// \param[in] type data type to convert the values into
/// \param[in] num_values number of values in the float array
/// \param[in] repeat_first_value if true, data will be poplulated with
/// values[0]
/// \param[in] values float array of values to convert and store in
/// data
///
/// \return a pointer with alloced memory containing the converted values
///
void *alloc_and_convert_float_values(zdnn_data_types type, uint64_t num_values,
bool repeat_first_value,
const float *values) {
// Malloc the data buffer
size_t data_size = num_values * get_data_type_size(type);
void *data = malloc(data_size);
memset(data, 0, data_size);
// Convert values into desired type and store in data buffer
for (uint64_t i = 0; i < num_values; i++) {
float value;
if (repeat_first_value) {
value = values[0];
} else {
value = values[i];
}
switch (type) {
case BFLOAT:
((uint16_t *)data)[i] = cnvt_1_fp32_to_bfloat(value);
break;
case FP16:
((uint16_t *)data)[i] = cnvt_1_fp32_to_fp16(value);
break;
case FP32:
((float *)data)[i] = value;
break;
default:
// NOTE: Along with undefined types, DLFLOAT types will also come down
// this path. zdnn_transform_ztensor() would fail with them as
// DLFLOATs are a stickified type and transform() expects unstickified
// data.
TEST_FAIL_MESSAGE_FORMATTED("unsupported type: %d", type);
}
}
return data;
}
/// Creates a ztensor with the provided values. Values are converted to the
/// specified type. The resulting ztensor is transformed and ready for use in
/// zDNN operations.
///
/// \note This method does not check that the size of values matches expected
/// number of elements.
///
/// Example usage:
/// Setup input tensor
/// \code
/// ztensor *zt = alloc_ztensor_with_values(shape, pre_tfrmd_layout,
/// type, NO_CONCAT, false, values);
/// \endcode
/// Setup Output tensor
/// \code
/// ztensor *zt = alloc_ztensor_with_values(shape, pre_tfrmd_layout,
/// type, NO_CONCAT, true,
/// ZERO_ARRAY);
/// \endcode
///
/// \param[in] shape array of dimensions
/// \param[in] pre_tfrmd_layout pre-transformed data layout
/// \param[in] type data type
/// \param[in] zdnn_concat_info
/// indicates the type of concatenation to use
/// This indirectly sets the transformed ztensor layout
/// and the number of values arrays to expect.
/// \param[in] repeat_first_value if true, ztensor will be poplulated with
/// values[0]
/// \param[in] ... float array(s) to tensor data or gates data.
/// 1 array for NO_CONCAT, 3 arrays for GRU, 4 arrays for LSTM
///
/// \return zdnn_ztensor* Pointer to a malloc'd ztensor with transformed data
///
zdnn_ztensor *alloc_ztensor_with_values(uint32_t *shape,
zdnn_data_layouts pre_tfrmd_layout,
zdnn_data_types type,
zdnn_concat_info info,
int repeat_first_value, ...) {
zdnn_status status = GENERAL_TESTCASE_FAILURE;
// Create the pretransformed description
zdnn_tensor_desc *pre_tfrmd_desc =
(zdnn_tensor_desc *)malloc(sizeof(zdnn_tensor_desc));
switch (pre_tfrmd_layout) {
case (ZDNN_1D):
zdnn_init_pre_transformed_desc(pre_tfrmd_layout, type, pre_tfrmd_desc,
shape[0]);
break;
case (ZDNN_2D):
case (ZDNN_2DS):
zdnn_init_pre_transformed_desc(pre_tfrmd_layout, type, pre_tfrmd_desc,
shape[0], shape[1]);
break;
case (ZDNN_3D):
case (ZDNN_3DS):
zdnn_init_pre_transformed_desc(pre_tfrmd_layout, type, pre_tfrmd_desc,
shape[0], shape[1], shape[2]);
break;
case (ZDNN_4D):
case (ZDNN_4DS):
case (ZDNN_NHWC):
case (ZDNN_NCHW):
case (ZDNN_HWCK):
zdnn_init_pre_transformed_desc(pre_tfrmd_layout, type, pre_tfrmd_desc,
shape[0], shape[1], shape[2], shape[3]);
break;
default:
TEST_FAIL_MESSAGE_FORMATTED(
"I'm dreadfully sorry but I don't seem to know how to deal with a %s "
"pre_tfrmd_layout. Could you teach me?",
get_data_layout_str(pre_tfrmd_layout));
break;
}
// Create the transformed description
zdnn_tensor_desc *tfrmd_desc =
(zdnn_tensor_desc *)malloc(sizeof(zdnn_tensor_desc));
if (info == NO_CONCAT) {
status = zdnn_generate_transformed_desc(pre_tfrmd_desc, tfrmd_desc);
TEST_ASSERT_MESSAGE_FORMATTED(
status == ZDNN_OK,
"zdnn_generate_transformed_desc failed (status = %08x)", status);
} else {
status = zdnn_generate_transformed_desc_concatenated(pre_tfrmd_desc, info,
tfrmd_desc);
TEST_ASSERT_MESSAGE_FORMATTED(status == ZDNN_OK,
"zdnn_generate_transformed_desc_concatenated "
"with info %08x failed (status = %08x)",
info, status);
}
// Create the ztensor with malloc'd buffer pointer
zdnn_ztensor *ztensor = (zdnn_ztensor *)malloc(sizeof(zdnn_ztensor));
status = zdnn_init_ztensor_with_malloc(pre_tfrmd_desc, tfrmd_desc, ztensor);
TEST_ASSERT_MESSAGE_FORMATTED(
status == ZDNN_OK, "zdnn_init_ztensor_with_malloc failed (status = %08x)",
status);
// Prepare to iterate over the passed in values arrays
va_list values_list;
va_start(values_list, repeat_first_value);
uint64_t num_elements = get_num_elements(ztensor, ELEMENTS_PRE_SINGLE_GATE);
if (pre_tfrmd_layout == ZDNN_4DS) {
// For testing outputs, we want to be able initialize rnn output ztensors
// to zeros but we don't need to support setting arbitrary values
memset(ztensor->buffer, 0, ztensor->buffer_size);
} else {
uint32_t num_things;
// Find out how many things to stickify
if (CONCAT_RNN_TYPE(info) == RNN_TYPE_LSTM) {
num_things = get_func_code_num_gates(NNPA_LSTMACT);
} else if (CONCAT_RNN_TYPE(info) == RNN_TYPE_GRU) {
num_things = get_func_code_num_gates(NNPA_GRUACT);
} else {
num_things = 1;
// the NO_CONCAT case, so we have 1 thing
}
void *values_data[num_things];
// Convert that many things
for (uint32_t i = 0; i < num_things; i++) {
values_data[i] = alloc_and_convert_float_values(
type, num_elements, repeat_first_value, va_arg(values_list, float *));
}
// Stickify ztensor using data that we type converted above
if (CONCAT_RNN_TYPE(info) == RNN_TYPE_LSTM) {
status = zdnn_transform_ztensor(ztensor, values_data[0], values_data[1],
values_data[2], values_data[3]);
} else if (CONCAT_RNN_TYPE(info) == RNN_TYPE_GRU) {
status = zdnn_transform_ztensor(ztensor, values_data[0], values_data[1],
values_data[2]);
} else {
status = zdnn_transform_ztensor(ztensor, values_data[0]);
}
TEST_ASSERT_MESSAGE_FORMATTED(
status == ZDNN_OK,
"zdnn_transform_ztensor failed with status %08x \"%s\"", status,
zdnn_get_status_message(status));
for (uint32_t i = 0; i < num_things; i++) {
free(values_data[i]);
}
}
va_end(values_list);
return ztensor;
}
/// Creates a ztensor with no value. The resulting ztensor is not transformed
/// and ready for use as an output in zDNN operations.
///
/// Example usage:
/// Setup input tensor
/// \code
/// ztensor *zt = alloc_ztensor_with_values(shape, pre_tfrmd_layout,
/// type, NO_CONCAT, false, values);
/// \endcode
/// Setup Output tensor
/// \code
/// ztensor *zt = alloc_output_ztensor(shape, pre_tfrmd_layout, type,
/// NO_CONCAT);
/// \endcode
///
/// \param[in] shape array of dimensions
/// \param[in] pre_tfrmd_layout pre-transformed data layout
/// \param[in] type data type
/// \param[in] zdnn_concat_info
/// indicates the type of concatenation to use
/// This indirectly sets the transformed ztensor layout
/// and the number of values arrays to expect.
///
/// \return zdnn_ztensor* Pointer to a malloc'd ztensor without transformed data
///
zdnn_ztensor *alloc_output_ztensor(uint32_t *shape,
zdnn_data_layouts pre_tfrmd_layout,
zdnn_data_types type,
zdnn_concat_info info) {
// Create the pretransformed description
zdnn_tensor_desc *pre_tfrmd_desc =
(zdnn_tensor_desc *)malloc(sizeof(zdnn_tensor_desc));
switch (pre_tfrmd_layout) {
case (ZDNN_1D):
zdnn_init_pre_transformed_desc(pre_tfrmd_layout, type, pre_tfrmd_desc,
shape[0]);
break;
case (ZDNN_2D):
case (ZDNN_2DS):
zdnn_init_pre_transformed_desc(pre_tfrmd_layout, type, pre_tfrmd_desc,
shape[0], shape[1]);
break;
case (ZDNN_3D):
case (ZDNN_3DS):
zdnn_init_pre_transformed_desc(pre_tfrmd_layout, type, pre_tfrmd_desc,
shape[0], shape[1], shape[2]);
break;
case (ZDNN_4D):
case (ZDNN_4DS):
case (ZDNN_NHWC):
case (ZDNN_NCHW):
case (ZDNN_HWCK):
zdnn_init_pre_transformed_desc(pre_tfrmd_layout, type, pre_tfrmd_desc,
shape[0], shape[1], shape[2], shape[3]);
break;
default:
TEST_FAIL_MESSAGE_FORMATTED(
"I'm dreadfully sorry but I don't seem to know how to deal with a %s "
"pre_tfrmd_layout. Could you teach me?",
get_data_layout_str(pre_tfrmd_layout));
break;
}
// Create the transformed description
zdnn_tensor_desc *tfrmd_desc =
(zdnn_tensor_desc *)malloc(sizeof(zdnn_tensor_desc));
zdnn_status status;
if (info == NO_CONCAT) {
status = zdnn_generate_transformed_desc(pre_tfrmd_desc, tfrmd_desc);
TEST_ASSERT_MESSAGE_FORMATTED(
status == ZDNN_OK,
"zdnn_generate_transformed_desc failed (status = %08x)", status);
} else {
status = zdnn_generate_transformed_desc_concatenated(pre_tfrmd_desc, info,
tfrmd_desc);
TEST_ASSERT_MESSAGE_FORMATTED(status == ZDNN_OK,
"zdnn_generate_transformed_desc_concatenated "
"with info %08x failed (status = %08x)",
info, status);
}
// Create the ztensor with malloc'd buffer pointer
zdnn_ztensor *ztensor = (zdnn_ztensor *)malloc(sizeof(zdnn_ztensor));
status = zdnn_init_ztensor_with_malloc(pre_tfrmd_desc, tfrmd_desc, ztensor);
TEST_ASSERT_MESSAGE_FORMATTED(
status == ZDNN_OK, "zdnn_init_ztensor_with_malloc failed (status = %08x)",
status);
return ztensor;
}
// -----------------------------------------------------------------------------
// ULP-based Floating Point Comparsino Functions
// -----------------------------------------------------------------------------
// used to get around "breaking strict-aliasing rules"
typedef union float_int_u {
// cppcheck-suppress unusedStructMember
float f;
int i;
} float_int_u;
int ulps_diff_float(float a, float b) {
float_int_u au = {a};
float_int_u bu = {b};
// Make au.i lexicographically ordered as a twos-complement int
if (au.i < 0)
au.i = 0x80000000 - au.i;
// Make bu.i lexicographically ordered as a twos-complement int
if (bu.i < 0)
bu.i = 0x80000000 - bu.i;
return abs(au.i - bu.i);
}
int ulps_diff_16(uint16_t a, uint16_t b) {
int16_t a_int = *(int16_t *)&a;
int16_t b_int = *(int16_t *)&b;
// Make a_int lexicographically ordered as a twos-complement int
if (a_int < 0)
a_int = 0x8000 - a_int;
// Make b_int lexicographically ordered as a twos-complement int
if (b_int < 0)
b_int = 0x8000 - b_int;
return abs(a_int - b_int);
}
// -----------------------------------------------------------------------------
// Floating Point Verify Functions
//
// - basic version (uses default fp_tolerance defined in testsupport.h)
// - advanced version, suppply custom fp_tolerance
//
// Use ULPs comparsion first, then epsilon as fallback
// -----------------------------------------------------------------------------
// advanced versions
bool almost_equal_bfloat_adv(uint16_t actual, uint16_t expected,
fp_tolerance tol) {
// try ulps verification first, so we don't need to convert things to float
int ulps_diff = ulps_diff_16(actual, expected);
if (ulps_diff > tol.ulps) {
LOG_DEBUG("actual = %f, expected = %f: ulps diff = %d (max = %d)",
cnvt_1_bfloat_to_fp32(actual), cnvt_1_bfloat_to_fp32(expected),
ulps_diff, tol.ulps);
// epsilon verification
float diff =
fabs(cnvt_1_bfloat_to_fp32(actual) - cnvt_1_bfloat_to_fp32(expected));
float max_diff = EPSILON_BFLOAT * tol.epsilon_mult;
LOG_DEBUG(" diff = %f (max = %f)", diff, max_diff);
return !(diff > max_diff);
}
return true;
}
bool almost_equal_fp16_adv(uint16_t actual, uint16_t expected,
fp_tolerance tol) {
// try ulps verification first, so we don't need to convert things to float
int ulps_diff = ulps_diff_16(actual, expected);
if (ulps_diff > tol.ulps) {
LOG_DEBUG("actual = %f, expected = %f: ulps diff = %d (max = %d)",
cnvt_1_fp16_to_fp32(actual), cnvt_1_fp16_to_fp32(expected),
ulps_diff, tol.ulps);
// epsilon verification
float diff =
fabs(cnvt_1_fp16_to_fp32(actual) - cnvt_1_fp16_to_fp32(expected));
float max_diff = EPSILON_FP16 * tol.epsilon_mult;
LOG_DEBUG(" diff = %f (max = %f)", diff, max_diff);
return !(diff > max_diff);
}
return true;
}
bool almost_equal_float_adv(float actual, float expected, fp_tolerance tol) {
// ulps-based verification
int ulps_diff = ulps_diff_float(actual, expected);
if (ulps_diff > tol.ulps) {
LOG_DEBUG("actual = %f, expected = %f: ulps diff = %d (max = %d)", actual,
expected, ulps_diff, tol.ulps);
// epsilon verification
float diff = fabs(actual - expected);
float max_diff = EPSILON_FLOAT * tol.epsilon_mult;
LOG_DEBUG(" diff = %f (max = %f)", diff, max_diff);
return !(diff > max_diff);
}
return true;
}
bool almost_equal_dlf16_adv(uint16_t actual, uint16_t expected,
fp_tolerance tol) {
// try ulps verification first, so we don't need to convert things to float
int ulps_diff = ulps_diff_16(actual, expected);
if (ulps_diff > tol.ulps) {
LOG_DEBUG("actual = %f, expected = %f: ulps diff = %d (max = %d)",
cnvt_1_dlf16_to_fp32(actual), cnvt_1_dlf16_to_fp32(expected),
ulps_diff, tol.ulps);
// epsilon verification
float diff =
fabs(cnvt_1_dlf16_to_fp32(actual) - cnvt_1_dlf16_to_fp32(expected));
float max_diff = EPSILON_DLFLOAT16 * tol.epsilon_mult;
LOG_DEBUG(" diff = %f (max = %f)", diff, max_diff);
return !(diff > max_diff);
}
return true;
}
// basic versions, use default fp_tolerance.
bool almost_equal_bfloat(uint16_t actual, uint16_t expected) {
fp_tolerance tol = {MAX_ULPS_BFLOAT, MAX_EPSILON_MULT_BFLOAT};
return almost_equal_bfloat_adv(actual, expected, tol);
}
bool almost_equal_fp16(uint16_t actual, uint16_t expected) {
fp_tolerance tol = {MAX_ULPS_FP16, MAX_EPSILON_MULT_FP16};
return almost_equal_fp16_adv(actual, expected, tol);
}
bool almost_equal_float(float actual, float expected) {
fp_tolerance tol = {MAX_ULPS_FLOAT, MAX_EPSILON_MULT_FLOAT};
return almost_equal_float_adv(actual, expected, tol);
}
bool almost_equal_dlf16(uint16_t actual, uint16_t expected) {
fp_tolerance tol = {MAX_ULPS_DLFLOAT16, MAX_EPSILON_MULT_DLFLOAT16};
return almost_equal_dlf16_adv(actual, expected, tol);
}
/// Asserts each value in the stickified ztensor are within a specified
/// tolerance from the given expected float values.
///
/// \note This method does not check that the size of values array matches the
/// number of elements. If there's not enough expected values, the test will
/// likely fail when garbage data is pulled in as the expected value.
///
/// Example usage:
/// \code
/// assert_ztensor_values_adv(&ztensor, false, values, true, tol);
/// \endcode
///
/// \param[in] ztensor pointer to zdnn_ztensor with actual values
/// \param[in] repeat_first_expected_value if true, all ztensor values will be
/// compared to values[0]
/// \param[in] values array of expected values
/// \param[in] tol floating point tolerance information
///
/// \return None (assert fails if any actual value not within expected range)
///
void assert_ztensor_values_adv(zdnn_ztensor *ztensor,
bool repeat_first_expected_value, void *values,
fp_tolerance tol) {
// Read in ZDNN_TEST_ERROR_ELEMENT_COUNT env var if set
// Controls the number of errors that get printed to stdout when running tests
// default to print at most ERROR_ELEMENT_COUNT_MAX_DEFAULT (10) errors
// per test. If ZDNN_TEST_ERROR_ELEMENT_COUNT=0 all informational output and
// errors will be printed to stdout.
uint64_t error_element_count_max = ERROR_ELEMENT_COUNT_MAX_DEFAULT;
bool always_print = false;
char *ptr = NULL;
if ((ptr = getenv(ENVVAR_TEST_ERROR_COUNT))) {
error_element_count_max = (uint64_t)strtoull(ptr, NULL, 10);
if (error_element_count_max == 0) {
always_print = true;
}
}
zdnn_status status;
zdnn_tensor_desc *pre_tfrmd_desc = ztensor->pre_transformed_desc;
uint64_t num_elements = 0;
switch (ztensor->transformed_desc->layout) {
case ZDNN_1D:
case ZDNN_2D:
case ZDNN_2DS:
case ZDNN_3D:
case ZDNN_3DS:
case ZDNN_4D:
case ZDNN_4DS:
case ZDNN_NHWC:
num_elements = get_num_elements(ztensor, ELEMENTS_PRE);
break;
case ZDNN_FICO:
case ZDNN_ZRH:
TEST_FAIL_MESSAGE_FORMATTED(
"does not support %s layout as we don't support unstickifying "
"concatenated ztensors.",
get_data_layout_str(ztensor->transformed_desc->layout));
break;
default:
TEST_FAIL_MESSAGE_FORMATTED(
"I'm dreadfully sorry but I don't seem to know how to deal with a %s "
"layout. Could you teach me?",
get_data_layout_str(ztensor->transformed_desc->layout));
break;
}
// Malloc error_message as it will be large if num_elements is large.
uint64_t big_error_message_size =
(uint64_t)sizeof(char) * ERROR_MESSAGE_STR_LENGTH * num_elements;
char *error_msg = malloc(big_error_message_size);
void *actual_vals, *expected_vals;
// Get unstickified data from ztensor to actual_vals[]
actual_vals = malloc(num_elements * get_data_type_size(pre_tfrmd_desc->type));
status = zdnn_transform_origtensor(ztensor, actual_vals);
snprintf(error_msg, big_error_message_size,
"zdnn_transform_origtensor failed (status = %08x)", status);
TEST_ASSERT_MESSAGE(status == ZDNN_OK, error_msg);
// expected_vals[] will contains the expected values (values[]) but in the
// same data type as actual_vals[], i.e., (pre_tfrmd_desc->type)
expected_vals =
malloc(num_elements * get_data_type_size(pre_tfrmd_desc->type));
// Instead of directly converting from C float to (pre_tfrmd_desc->type), we
// convert it to DLFLOAT16 first then (pre_tfrmd_desc->type) in order to
// simulate the precision loss the values have gone through. The same
// process applies for FP32.
for (uint64_t i = 0; i < num_elements; i++) {
// Handle INT32 case first, since it does not require a conversion.
if (pre_tfrmd_desc->type == INT32) {
((uint32_t *)expected_vals)[i] = ((uint32_t *)values)[i];
continue;
}
uint16_t tmp_dlf16;
if (!repeat_first_expected_value) {
tmp_dlf16 = cnvt_1_fp32_to_dlf16(((float *)values)[i]);
} else {
tmp_dlf16 = cnvt_1_fp32_to_dlf16(((float *)values)[0]);
}
switch (pre_tfrmd_desc->type) {
case BFLOAT:
((uint16_t *)expected_vals)[i] =
cnvt_1_fp32_to_bfloat(cnvt_1_dlf16_to_fp32(tmp_dlf16));
break;
case FP16:
((uint16_t *)expected_vals)[i] =
cnvt_1_fp32_to_fp16(cnvt_1_dlf16_to_fp32(tmp_dlf16));
break;
case FP32:
((float *)expected_vals)[i] = cnvt_1_dlf16_to_fp32(tmp_dlf16);
break;
default:
// NOTE: Along with undefined types, DLFLOAT types will also come down
// this path. DLFLOATS are a stickified types which are not valid types
// for the pre_tfrmd_desc (ie prestickifed description).
snprintf(error_msg, big_error_message_size, "unsupported type: %d\n",
pre_tfrmd_desc->type);
TEST_FAIL_MESSAGE(error_msg);
break;
}
}
// Assert ztentor's values (converted back to floats) match does not exceed
// max ULPs and epsilon
bool all_pass = true;
// Loop appends to error_msg so reset it first
error_msg[0] = '\0';
char *info_fmt =
"Element %" PRIu64 " == %f expecting %f (within tolerance)\n";
char *info_fmt_int32 =
"Element %" PRIu64 " == %u expecting %u (within tolerance)\n";
char *error_fmt = "Element %" PRIu64 " == %f expecting %f";
char *error_fmt_int32 = "Element %" PRIu64 " == %u expecting %u";
char *error_fmt2 =
" <==== FAILED (diff beyond ULPs %u, epsilon multiplier %u)\n";
uint64_t error_count = 0;
// Compared the actual and expected values
for (uint64_t i = 0; i < num_elements; i++) {
bool is_almost_equal = false;
// new line at beginning of each test
if (i == 0) {
snprintf(error_msg + strlen(error_msg),
big_error_message_size - strlen(error_msg), "\n");
}
switch (pre_tfrmd_desc->type) {
case BFLOAT: {
uint16_t actual = ((uint16_t *)actual_vals)[i];
uint16_t expected = ((uint16_t *)expected_vals)[i];
is_almost_equal = almost_equal_bfloat_adv(actual, expected, tol);
if (!is_almost_equal) {
// Test not within tolerance
if (error_count <= error_element_count_max || always_print) {
// print test failed if error_count < error_element_count_max
// or always_print=true (ZDNN_TEST_ERROR_ELEMENT_COUNT=0)
// prints message like: Element xxxx == xxxx expecting xxxx
snprintf(error_msg + strlen(error_msg),
big_error_message_size - strlen(error_msg), error_fmt, i,
cnvt_1_bfloat_to_fp32(actual),
cnvt_1_bfloat_to_fp32(expected));
}
error_count++;
} else if (always_print) {
// Test within tolerance
// Output informational message only if always_print=true
// (ZDNN_TEST_ERROR_ELEMENT_COUNT=0)
// prints message like: Element xxxx == xxxx expecting xxxx (within
// tolerance)
snprintf(error_msg + strlen(error_msg),
big_error_message_size - strlen(error_msg), info_fmt, i,
cnvt_1_bfloat_to_fp32(actual),
cnvt_1_bfloat_to_fp32(expected));
}
LOG_DEBUG(error_fmt, i, cnvt_1_bfloat_to_fp32(actual),
cnvt_1_bfloat_to_fp32(expected));
break;
}
case FP16: {
uint16_t actual = ((uint16_t *)actual_vals)[i];
uint16_t expected = ((uint16_t *)expected_vals)[i];
is_almost_equal = almost_equal_fp16_adv(actual, expected, tol);
if (!is_almost_equal) {
// Test not within tolerance
if (error_count < error_element_count_max || always_print) {
// print test failed if error_count < error_element_count_max
// or always_print=true (ZDNN_TEST_ERROR_ELEMENT_COUNT=0)
// prints message like: Element xxxx == xxxx expecting xxxx
snprintf(error_msg + strlen(error_msg),
big_error_message_size - strlen(error_msg), error_fmt, i,
cnvt_1_fp16_to_fp32(actual), cnvt_1_fp16_to_fp32(expected));
}
error_count++;
} else if (always_print) {
// Test within tolerance
// Output informational message only if always_print=true
// (ZDNN_TEST_ERROR_ELEMENT_COUNT=0)
// prints message like: Element xxxx == xxxx expecting xxxx (within
// tolerance)
snprintf(error_msg + strlen(error_msg),
big_error_message_size - strlen(error_msg), info_fmt, i,
cnvt_1_fp16_to_fp32(actual), cnvt_1_fp16_to_fp32(expected));
}
LOG_DEBUG(error_fmt, i, cnvt_1_fp16_to_fp32(actual),
cnvt_1_fp16_to_fp32(expected));
break;
}
case FP32: {
float actual = ((float *)actual_vals)[i];
float expected = ((float *)expected_vals)[i];
is_almost_equal = almost_equal_float_adv(actual, expected, tol);
if (!is_almost_equal) {
// Test not within tolerance
if (error_count < error_element_count_max || always_print) {
// print test failed if error_count < error_element_count_max
// or always_print=true (ZDNN_TEST_ERROR_ELEMENT_COUNT=0)
// prints message like: Element xxxx == xxxx expecting xxxx
snprintf(error_msg + strlen(error_msg),
big_error_message_size - strlen(error_msg), error_fmt, i,
actual, expected);
}
error_count++;
} else if (always_print) {
// Test within tolerance
// Output informational message only if always_print=true
// (ZDNN_TEST_ERROR_ELEMENT_COUNT=0)
// prints message like: Element xxxx == xxxx expecting xxxx (within
// tolerance)
snprintf(error_msg + strlen(error_msg),
big_error_message_size - strlen(error_msg), info_fmt, i,
actual, expected);
}
LOG_DEBUG(error_fmt, i, actual, expected);
break;
}
case INT32: {
uint32_t actual = ((uint32_t *)actual_vals)[i];
uint32_t expected = ((uint32_t *)expected_vals)[i];
is_almost_equal = (actual == expected);
if (!is_almost_equal) {
// Test not within tolerance
if (error_count <= error_element_count_max || always_print) {
// print test failed if error_count < error_element_count_max
// or always_print=true (ZDNN_TEST_ERROR_ELEMENT_COUNT=0)
// prints message like: Element xxxx == xxxx expecting xxxx
snprintf(error_msg + strlen(error_msg),
big_error_message_size - strlen(error_msg), error_fmt_int32,
i, actual, expected);
}
error_count++;
} else if (always_print) {
// Test within tolerance
// Output informational message only if always_print=true
// (ZDNN_TEST_ERROR_ELEMENT_COUNT=0)
// prints message like: Element xxxx == xxxx expecting xxxx (within
// tolerance)
snprintf(error_msg + strlen(error_msg),
big_error_message_size - strlen(error_msg), info_fmt_int32, i,
actual, expected);
}
LOG_DEBUG(error_fmt_int32, i, actual, expected);
break;
}
default:
// would have died earlier
break;
}
// Only print when not within tolerance and error_count <=
// error_element_count_max OR always_print (ZDNN_TEST_ERROR_ELEMENT_COUNT=0)
// is true. Prints message like:
// <==== FAILED (diff beyond ULPs X, epsilon multiplier X)
if ((!is_almost_equal) &&
(error_count <= error_element_count_max || always_print)) {
all_pass = false;
snprintf(error_msg + strlen(error_msg),
big_error_message_size - strlen(error_msg), error_fmt2, tol.ulps,
tol.epsilon_mult);
}
}
// Assert that all passed and clean up temp data
TEST_ASSERT_MESSAGE(all_pass, error_msg);
free(expected_vals);
free(actual_vals);
free(error_msg);
}
void assert_ztensor_values(zdnn_ztensor *ztensor,
bool repeat_first_expected_value, void *values) {
fp_tolerance tol = {0, 0}; // zero tolerance ==> testcase will likely fail.
switch (ztensor->pre_transformed_desc->type) {
case BFLOAT:
tol.ulps = MAX_ULPS_BFLOAT;
tol.epsilon_mult = MAX_EPSILON_MULT_BFLOAT;
break;
case FP16:
tol.ulps = MAX_ULPS_FP16;
tol.epsilon_mult = MAX_EPSILON_MULT_FP16;
break;
case FP32:
tol.ulps = MAX_ULPS_FLOAT;
tol.epsilon_mult = MAX_EPSILON_MULT_FLOAT;
break;
default:
// let assert_ztensor_values_adv() deal with it
break;
}
assert_ztensor_values_adv(ztensor, repeat_first_expected_value, values, tol);
}
/// Free buffers, descriptions, and ztensors structs for all provided ztensors
///
/// \param[in] num_of_ztensors number of ztensors pointers passed into this
/// method
/// \param[in] ... variable number of ztensor pointers
///
/// \return None (assert fails if freeing any buffer fails)
///
void free_ztensor_buffers(uint32_t num_ztensors, ...) {
// Create ztensor_list to handle the multple input ztensors passed in.
va_list ztensor_list;
va_start(ztensor_list, num_ztensors);
// Free data buffer for each provided ztensor
for (uint32_t i = 0; i < num_ztensors; i++) {
zdnn_status status;
zdnn_ztensor *ztensor = va_arg(ztensor_list, zdnn_ztensor *);
if ((status = zdnn_free_ztensor_buffer(ztensor)) != ZDNN_OK) {
TEST_FAIL_MESSAGE_FORMATTED(
"zdnn_free_ztensor_buffer() failed on tensor %u with status %08x", i,
status);
}
free(ztensor->transformed_desc);
free(ztensor->pre_transformed_desc);
free(ztensor);
}
va_end(ztensor_list);
}
/// Initializes seed for rand() function.
/// If the environment variable ZDNN_TEST_RANDOM_SEED is set to a
/// numeric string, the value will be used as the seed. If ZDNN_TEST_RANDOM_SEED
/// is NULL, 0, or non-numeric, a random seed is generated
///
/// \return None
///
void setup_random_seed() {
char *env_seed = getenv(ENVVAR_TEST_RANDOM_SEED);
int seed;
if (env_seed != NULL && (int)(strtoull(env_seed, NULL, 10)) != 0) {
seed = (int)(strtoull(env_seed, NULL, 10));
// Casting to ensure truncation is clear if necessary
} else {
struct timeval t1;
gettimeofday(&t1, NULL);
seed = (int)(t1.tv_sec * t1.tv_usec);
}
srand(seed);
printf("Using seed: %d\n", seed);
}
/// Allocates a data buffer then fills it with random float values (between
/// SMALLEST_RANDOM_FP to 1)
///
/// \param[out] ztensor A zdnn tensor
///
/// \return pointer to filled data buffer
///
unsigned char *create_and_fill_random_fp_data(zdnn_ztensor *ztensor) {
// The single concat looks at just the pre_tfrmd shape which matches tfrmd
// size for everything but concat cases. For concat tests that use this, we
// want the single concat size specifically because we generate the data for
// each concat (RNN gate) separately.
uint64_t num_elements = get_num_elements(ztensor, ELEMENTS_PRE_SINGLE_GATE);
zdnn_data_types dtype = ztensor->pre_transformed_desc->type;
void *data = malloc(num_elements * get_data_type_size(dtype));
setup_random_seed();
for (int i = 0; i < num_elements; i++) {
float filling = 0;
// https://stackoverflow.com/questions/13408990/how-to-generate-random-float-number-in-c
while (filling < SMALLEST_RANDOM_FP) {
filling = (float)rand() / (float)(RAND_MAX);
}
switch (dtype) {
case BFLOAT:
((uint16_t *)data)[i] = cnvt_1_fp32_to_bfloat(filling);
break;
case FP16:
((uint16_t *)data)[i] = cnvt_1_fp32_to_fp16(filling);
break;
case FP32:
((float *)data)[i] = filling;
break;
case ZDNN_DLFLOAT16:
((uint16_t *)data)[i] = cnvt_1_fp32_to_dlf16(filling);
default:
LOG_WARN("Unknown data type: %d", dtype);
}
}
return data;
}
/// Allocates a data buffer then fills it with random INT8 values
///
/// \param[out] ztensor A zdnn tensor
///
/// \return pointer to filled data buffer
///
int8_t *create_and_fill_random_int8_data(zdnn_ztensor *ztensor) {
uint64_t num_elements = get_num_elements(ztensor, ELEMENTS_PRE_SINGLE_GATE);
int8_t *data = (int8_t *)malloc(num_elements);
setup_random_seed();
int upper = 127, lower = -128;
for (int i = 0; i < num_elements; i++) {
data[i] = (rand() % (upper - lower + 1)) + lower;
}
return data;
}
/**
* Helper that generates random floats and populate the given array. This will
* be used for populating tensor buffers in the end-to-end unit tests.
*
* https://stackoverflow.com/questions/13408990/how-to-generate-random-float-number-in-c
*/
void gen_random_float_array(int size, float arr[]) {
setup_random_seed();
// The raw output value will be [0, a]. To make sure we're always at least
// SMALLEST_RANDOM_FP from zero, add it to the result. Also subtract it
// from the max so when we add it to the result, we'll still be within max.
float desired_max = LARGEST_RANDOM_FP - SMALLEST_RANDOM_FP;
for (int i = 0; i < size; i++) {
arr[i] =
((float)rand() / (float)(RAND_MAX)) * desired_max + SMALLEST_RANDOM_FP;
}
}
/**
* Helper that generates random negative floats and populate the given
* array. This will be used for populating tensor buffers in the end-to-end
* unit tests.
*/
void gen_random_float_array_neg(int size, float arr[]) {
setup_random_seed();
// The raw output value will be [0, a]. To make sure we're always at least
// SMALLEST_RANDOM_FP from zero, add it to the result. Also subtract it
// from the max so when we add it to the result, we'll still be within max.
float desired_max = LARGEST_RANDOM_FP - SMALLEST_RANDOM_FP;
for (int i = 0; i < size; i++) {
arr[i] =
-((float)rand() / (float)(RAND_MAX)) * desired_max + SMALLEST_RANDOM_FP;
}
}
/**
* Helper that generates random negative and positive float values for a given
* size and for a given array, meant for populating tensor buffers in
* end-to-end unit tests.
*
* Every other array index will be negative:
*
* Example: [-1, 2, -3, 4, -5, 6]
*/
void gen_random_float_array_pos_neg(int size, float arr[]) {
setup_random_seed();
float desired_max = LARGEST_RANDOM_FP - SMALLEST_RANDOM_FP;
for (int i = 0; i < size; i++) {
arr[i] = (((float)rand() / (float)(RAND_MAX)) * desired_max +
SMALLEST_RANDOM_FP) *
((i % 2 == 0) ? 1 : -1);
}
}
/**
* Helper that generates random floats and populate the given array. This will
* be used for populating tensor buffers in the end-to-end unit tests.
*
* https://stackoverflow.com/questions/13408990/how-to-generate-random-float-number-in-c
*/
void gen_random_float_array_range(int size, float arr[], float min, float max) {
setup_random_seed();
// The raw output value will be [min, max].
for (int i = 0; i < size; i++) {
arr[i] = min + ((float)rand() / (float)(RAND_MAX)) * (max - min);
}
}
/**
* Helper that generates 0 values for a given size
* and for a given array, meant for populating tensor buffers
* in end-to-end unit tests.
*/
void gen_float_array_zeros(int size, float arr[]) {
for (int i = 0; i < size; i++) {
arr[i] = 0;
}
}
/**
* Helper that generates an array copy for a given size and
* for a given array, meant for populating tensor buffers in
* end-to-end unit tests.
*/
void copy_to_array(int size, const float input[], float output[]) {
for (int i = 0; i < size; i++) {
output[i] = input[i];
}
}
/**
* Helper that generates an array with every other value equaling zero for a
* given size and for a given array, meant for populating tensor buffers in
* end-to-end unit tests.
*
* Every other array index will be negative:
*
* Example:
* input: [1,2,3,4,5,6]
* output: [0,2,0,4,0,6]
*/
void fill_everyother_with_zero_float_array(int size, float arr[]) {
for (int i = 0; i < size; i++) {
if (i % 2 != 0) {
arr[i] = 0;
}
}
}
/**
* Helper that generates an array with all values equaling zero for a
* given size and for a given array, meant for populating tensor buffers in
* end-to-end unit tests.
*/
void fill_all_with_zero_float_array(int size, float arr[]) {
for (int i = 0; i < size; i++) {
arr[i] = 0;
}
}
/**
* Helper that recieves a function pointer to some function that estimates a
* value. For example, this could be the GeLu approximator function. This will
* calculate the expected results based on the input values passed.
*/
void generate_expected_output(float (*fn)(float), float input_values[],
int num_values, float expected_values[]) {
for (int i = 0; i < num_values; i++) {
expected_values[i] = fn(input_values[i]);
}
}
int stdout_pipe[2];
int stderr_pipe[2];
int saved_stdout;
int saved_stderr;
void stdout_to_pipe() {
// save stream for display later
saved_stdout = dup(STDOUT_FILENO);
fflush(stdout);
// make a pipe
if (pipe(stdout_pipe) != 0) {
TEST_FAIL_MESSAGE("Can't open pipe()");
}
// redirect to pipe
dup2(stdout_pipe[1], STDOUT_FILENO);
close(stdout_pipe[1]);
return;
}
void stderr_to_pipe() {
// save stream for display later
saved_stderr = dup(STDERR_FILENO);
fflush(stderr);
// make a pipe
if (pipe(stderr_pipe) != 0) {
TEST_FAIL_MESSAGE("Can't open pipe()");
}
// redirect to pipe
dup2(stderr_pipe[1], STDERR_FILENO);
close(stderr_pipe[1]);
return;
}
void restore_stdout(char *buf, int buf_size) {
// the read() below blocks if nothing to read, so printf something
fprintf(stdout, " ");
fflush(stdout);
// read from pipe into buffer
read(stdout_pipe[0], buf, buf_size);
close(stdout_pipe[0]);
// restore stream to display
dup2(saved_stdout, STDOUT_FILENO);
}
void restore_stderr(char *buf, int buf_size) {
// the read() below blocks if nothing to read, so printf something
fprintf(stderr, "x");
fflush(stderr);
// read from pipe into buffer
read(stderr_pipe[0], buf, buf_size);
close(stderr_pipe[0]);
// restore stream to display
dup2(saved_stderr, STDERR_FILENO);
}
/**********************************************************
* Enhanced Unity Functions/Macros
**********************************************************/
#define NUM_ALL_PRE_TFRMD_TYPES 5
zdnn_data_types all_pre_tfrmd_types[NUM_ALL_PRE_TFRMD_TYPES] = {
INT8, INT32, FP16, FP32, BFLOAT};
#define NUM_DLFLOAT16_PRE_TFRMD_TYPES 3
zdnn_data_types dlfloat_pre_tfrmd_types[NUM_DLFLOAT16_PRE_TFRMD_TYPES] = {
FP16, FP32, BFLOAT};
#define NUM_QUANTIZED_PRE_TFRMD_TYPES 1
zdnn_data_types quantized_pre_tfrmd_types[NUM_QUANTIZED_PRE_TFRMD_TYPES] = {
INT8};
#define NUM_INDEX_PRE_TFRMD_TYPES 1
zdnn_data_types index_pre_tfrmd_types[NUM_INDEX_PRE_TFRMD_TYPES] = {INT32};
#define NUM_ALL_TFRMD_TYPES 4
zdnn_data_types all_tfrmd_types[NUM_ALL_PRE_TFRMD_TYPES] = {
ZDNN_DLFLOAT16, ZDNN_BINARY_FP32, ZDNN_BINARY_INT8, ZDNN_BINARY_INT32};
#define NUM_DLFLOAT16_TFRMD_TYPES 1
zdnn_data_types dlfloat_tfrmd_types[NUM_DLFLOAT16_TFRMD_TYPES] = {
ZDNN_DLFLOAT16};
#define NUM_QUANTIZED_TFRMD_TYPES 1
zdnn_data_types quantized_tfrmd_types[NUM_QUANTIZED_TFRMD_TYPES] = {
ZDNN_BINARY_INT8};
#define NUM_INDEX_TFRMD_TYPES 1
zdnn_data_types index_tfrmd_types[NUM_INDEX_TFRMD_TYPES] = {ZDNN_BINARY_INT32};
// indicates which data-type UnityDefaultTestRunWith*DataType() is currently
// testing
zdnn_data_types test_datatype = 128; // set initial value to something invalid
// Wrapper of Unity's UnityDefaultTestRun() that runs func() against all
// input data-types. Uses CamelCase intentionally to align with Unity.
// Function for All, DLFloat16, Quantized, and Index pre-transformed types.
void UnityDefaultTestRunWithAllPreDataType(UnityTestFunction Func,
const char *FuncName,
const int FuncLineNum) {
for (int i = 0; i < NUM_ALL_PRE_TFRMD_TYPES; i++) {
test_datatype = all_pre_tfrmd_types[i];
// FuncNameWithDataType is FuncName + " (data-type)" for printing
char FuncNameWithDataType[FUNCNAME_BANNER_LENGTH];
Unity.CurrentTestName = FuncNameWithDataType;
snprintf(FuncNameWithDataType, FUNCNAME_BANNER_LENGTH, "%s (%s)", FuncName,
get_data_type_str(all_pre_tfrmd_types[i]));
UnityDefaultTestRun(Func, FuncNameWithDataType, FuncLineNum);
}
}
void UnityDefaultTestRunWithDLFloat16PreDataType(UnityTestFunction Func,
const char *FuncName,
const int FuncLineNum) {
for (int i = 0; i < NUM_DLFLOAT16_PRE_TFRMD_TYPES; i++) {
test_datatype = dlfloat_pre_tfrmd_types[i];
// FuncNameWithDataType is FuncName + " (data-type)" for printing
char FuncNameWithDataType[FUNCNAME_BANNER_LENGTH];
Unity.CurrentTestName = FuncNameWithDataType;
snprintf(FuncNameWithDataType, FUNCNAME_BANNER_LENGTH, "%s (%s)", FuncName,
get_data_type_str(dlfloat_pre_tfrmd_types[i]));
UnityDefaultTestRun(Func, FuncNameWithDataType, FuncLineNum);
}
}
void UnityDefaultTestRunWithQuantizedPreDataType(UnityTestFunction Func,
const char *FuncName,
const int FuncLineNum) {
for (int i = 0; i < NUM_QUANTIZED_PRE_TFRMD_TYPES; i++) {
test_datatype = quantized_pre_tfrmd_types[i];
// FuncNameWithDataType is FuncName + " (data-type)" for printing
char FuncNameWithDataType[FUNCNAME_BANNER_LENGTH];
Unity.CurrentTestName = FuncNameWithDataType;
snprintf(FuncNameWithDataType, FUNCNAME_BANNER_LENGTH, "%s (%s)", FuncName,
get_data_type_str(quantized_pre_tfrmd_types[i]));
UnityDefaultTestRun(Func, FuncNameWithDataType, FuncLineNum);
}
}
void UnityDefaultTestRunWithIndexPreDataType(UnityTestFunction Func,
const char *FuncName,
const int FuncLineNum) {
for (int i = 0; i < NUM_INDEX_PRE_TFRMD_TYPES; i++) {
test_datatype = index_pre_tfrmd_types[i];
// FuncNameWithDataType is FuncName + " (data-type)" for printing
char FuncNameWithDataType[FUNCNAME_BANNER_LENGTH];
Unity.CurrentTestName = FuncNameWithDataType;
snprintf(FuncNameWithDataType, FUNCNAME_BANNER_LENGTH, "%s (%s)", FuncName,
get_data_type_str(index_pre_tfrmd_types[i]));
UnityDefaultTestRun(Func, FuncNameWithDataType, FuncLineNum);
}
}
// UnityDefaultTestRunWithAllPreDataType() but with all transformed data-types
void UnityDefaultTestRunWithAllTfrmdDataType(UnityTestFunction Func,
const char *FuncName,
const int FuncLineNum) {
for (int i = 0; i < NUM_ALL_TFRMD_TYPES; i++) {
test_datatype = all_tfrmd_types[i];
// FuncNameWithDataType is FuncName + " (data-type)" for printing
char FuncNameWithDataType[FUNCNAME_BANNER_LENGTH];
Unity.CurrentTestName = FuncNameWithDataType;
snprintf(FuncNameWithDataType, FUNCNAME_BANNER_LENGTH, "%s (%s)", FuncName,
get_data_type_str(all_tfrmd_types[i]));
UnityDefaultTestRun(Func, FuncNameWithDataType, FuncLineNum);
}
}
// UnityDefaultTestRunWithDLFloat16PreDataType() but with transformed
// data-types
void UnityDefaultTestRunWithDLFloat16TfrmdDataType(UnityTestFunction Func,
const char *FuncName,
const int FuncLineNum) {
for (int i = 0; i < NUM_DLFLOAT16_TFRMD_TYPES; i++) {
test_datatype = dlfloat_tfrmd_types[i];
// FuncNameWithDataType is FuncName + " (data-type)" for printing
char FuncNameWithDataType[FUNCNAME_BANNER_LENGTH];
Unity.CurrentTestName = FuncNameWithDataType;
snprintf(FuncNameWithDataType, FUNCNAME_BANNER_LENGTH, "%s (%s)", FuncName,
get_data_type_str(dlfloat_tfrmd_types[i]));
UnityDefaultTestRun(Func, FuncNameWithDataType, FuncLineNum);
}
}
// UnityDefaultTestRunWithQuantizedPreDataType() but with transformed data-types
// cppcheck-suppress unusedFunction
void UnityDefaultTestRunWithQuantizedTfrmdDataType(UnityTestFunction Func,
const char *FuncName,
const int FuncLineNum) {
for (int i = 0; i < NUM_QUANTIZED_TFRMD_TYPES; i++) {
test_datatype = quantized_tfrmd_types[i];
// FuncNameWithDataType is FuncName + " (data-type)" for printing
char FuncNameWithDataType[FUNCNAME_BANNER_LENGTH];
Unity.CurrentTestName = FuncNameWithDataType;
snprintf(FuncNameWithDataType, FUNCNAME_BANNER_LENGTH, "%s (%s)", FuncName,
get_data_type_str(quantized_tfrmd_types[i]));
UnityDefaultTestRun(Func, FuncNameWithDataType, FuncLineNum);
}
}
// UnityDefaultTestRunWithIndexPreDataType() but with transformed data-types
// cppcheck-suppress unusedFunction
void UnityDefaultTestRunWithIndexTfrmdDataType(UnityTestFunction Func,
const char *FuncName,
const int FuncLineNum) {
for (int i = 0; i < NUM_INDEX_TFRMD_TYPES; i++) {
test_datatype = index_tfrmd_types[i];
// FuncNameWithDataType is FuncName + " (data-type)" for printing
char FuncNameWithDataType[FUNCNAME_BANNER_LENGTH];
Unity.CurrentTestName = FuncNameWithDataType;
snprintf(FuncNameWithDataType, FUNCNAME_BANNER_LENGTH, "%s (%s)", FuncName,
get_data_type_str(index_tfrmd_types[i]));
UnityDefaultTestRun(Func, FuncNameWithDataType, FuncLineNum);
}
}
bool isTelumI() {
return (zdnn_is_nnpa_installed() && (zdnn_is_nnpa_parmblk_fmt_installed(
1, NNPA_PARMBLKFORMAT_1) == false));
} zDNN-1.1.2/tests/testsupport.h 0000664 0000000 0000000 00000032301 15000221702 0016237 0 ustar 00root root 0000000 0000000 // SPDX-License-Identifier: Apache-2.0
/*
* Copyright IBM Corp. 2021, 2024
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#ifndef TESTS_TESTSUPPORT_H_
#define TESTS_TESTSUPPORT_H_
#include "convert.h"
#include "unity.h"
#include "zdnn.h"
#include "zdnn_private.h"
#include
#include
#define ENVVAR_TEST_RANDOM_SEED "ZDNN_TEST_RANDOM_SEED"
#define ENVVAR_TEST_ERROR_COUNT "ZDNN_TEST_ERROR_ELEMENT_COUNT"
#define ERROR_ELEMENT_COUNT_MAX_DEFAULT 10
#define AIU_METHOD_STR_LENGTH 32
extern float ZERO_ARRAY[1];
#define NO_CONCAT 0xFFFFFFFF
// "default" failure when non of the ZDNN_STATUS's si appropriate,
// likely due to something's wrong with the testcase itself
#define GENERAL_TESTCASE_FAILURE 0xDEADBEEF
void nhwc_2_nchw(void *nhwc_ptr, uint32_t n, uint32_t h, uint32_t w, uint32_t c,
int element_size, void *nchw_ptr);
size_t *alloc_offsets(zdnn_ztensor *ztensor);
size_t *alloc_rnn_offsets(const zdnn_ztensor *ztensor);
size_t *alloc_rnn_output_offsets(const zdnn_ztensor *ztensor);
void *alloc_and_convert_float_values(zdnn_data_types type, uint64_t num_values,
bool repeat_first_value,
const float *values);
zdnn_ztensor *alloc_ztensor_with_values(uint32_t *shape,
zdnn_data_layouts pre_tfrmd_layout,
zdnn_data_types type,
zdnn_concat_info info,
int repeat_first_value, ...);
zdnn_ztensor *alloc_output_ztensor(uint32_t *shape,
zdnn_data_layouts pre_tfrmd_layout,
zdnn_data_types type, zdnn_concat_info info);
void free_ztensor_buffers(uint32_t num_ztensors, ...);
// Struct for floating point value tolerance information.
typedef struct fp_tolerance {
uint32_t ulps; // unit in the last place
uint32_t epsilon_mult; // epsilon multiplier
} fp_tolerance;
extern fp_tolerance tol_bfloat, tol_fp16, tol_fp32;
void assert_ztensor_values(zdnn_ztensor *ztensor,
bool repeat_first_expected_value, void *values);
void assert_ztensor_values_adv(zdnn_ztensor *ztensor,
bool repeat_first_expected_value, void *values,
fp_tolerance tol);
unsigned char *create_and_fill_fp_data(zdnn_tensor_desc *desc);
unsigned char *create_and_fill_random_fp_data(zdnn_ztensor *ztensor);
int8_t *create_and_fill_random_int8_data(zdnn_ztensor *ztensor);
void gen_random_float_array(int size, float arr[]);
void gen_random_float_array_neg(int size, float arr[]);
void gen_random_float_array_pos_neg(int size, float arr[]);
void gen_random_float_array_range(int size, float arr[], float min, float max);
void gen_float_array_zeros(int size, float arr[]);
void copy_to_array(int size, const float input[], float output[]);
void fill_everyother_with_zero_float_array(int size, float arr[]);
void fill_all_with_zero_float_array(int size, float arr[]);
void generate_expected_output(float (*fn)(float), float input_values[],
int num_values, float expected_values[]);
#define SEQUENTIAL_FILL_INTERVAL 1.0F
#define SEQUENTIAL_FILL_MAX 1024.0F // sacrifice BFLOAT, 256 is too small
// "OK" tolerance values.
//
// As everything gets converted to DLFLOAT16 and back, some data types will fare
// better dealing with precision loss than others, thus the different values
// among the data types.
//
// Some ops may need higher/lower tolerance than these defaults.
#define MAX_ULPS_BFLOAT 8
#define MAX_ULPS_FP16 8
#define MAX_ULPS_FLOAT (16384 * 8)
#define MAX_ULPS_DLFLOAT16 8
#define MAX_EPSILON_MULT_BFLOAT 8
#define MAX_EPSILON_MULT_FP16 8
#define MAX_EPSILON_MULT_FLOAT (5120 * 8)
#define MAX_EPSILON_MULT_DLFLOAT16 8
// epsilon = 2 ^ (-num_mantissa_bits - 1)
#define EPSILON_BFLOAT 0.00390625F // 2 ^ -8
#define EPSILON_FP16 0.00048828125F // 2 ^ -11
#define EPSILON_FLOAT 0.000000059604644775390625F // 2 ^ -24, FLT_EPSILON
#define EPSILON_DLFLOAT16 0.0009765625F // 2 ^ -10
bool almost_equal_bfloat_adv(uint16_t actual, uint16_t expected,
fp_tolerance tol);
bool almost_equal_fp16_adv(uint16_t actual, uint16_t expected,
fp_tolerance tol);
bool almost_equal_float_adv(float actual, float expected, fp_tolerance tol);
bool almost_equal_dlf16_adv(uint16_t actual, uint16_t expected,
fp_tolerance tol);
bool almost_equal_bfloat(uint16_t actual, uint16_t expected);
bool almost_equal_fp16(uint16_t actual, uint16_t expected);
bool almost_equal_float(float actual, float expected);
bool almost_equal_dlf16(uint16_t actual, uint16_t expected);
// in some cases we can't use the single-precision float values as-is for
// calculating expected results. these macros convert a given single-precision
// value to its "representable-by-zAIU" value w.r.t. its pre-transformed data
// type
#define CLEANSE_BFLOAT(x) \
cnvt_1_dlf16_to_fp32( \
cnvt_1_fp32_to_dlf16(cnvt_1_bfloat_to_fp32((cnvt_1_fp32_to_bfloat(x)))))
#define CLEANSE_FP16(x) \
cnvt_1_dlf16_to_fp32( \
cnvt_1_fp32_to_dlf16(cnvt_1_fp16_to_fp32((cnvt_1_fp32_to_fp16(x)))))
#define CLEANSE_FP32(x) cnvt_1_dlf16_to_fp32(cnvt_1_fp32_to_dlf16(x))
// Max/min absolute values for some of the test random float generators
#define LARGEST_RANDOM_FP 5.0F
#define SMALLEST_RANDOM_FP 0.00008F
// Changed 0.00006F to 0.00008F due to exceeding upper limit of FP16 in div op
// -----------------------------------------------------------------------------
// Max values by type (to create NNPA overflow)
// -----------------------------------------------------------------------------
#define MAX_FP32 FLT_MAX
#define MAX_FP16 ((float)65504) // 2^15 * (1 + 1023/1024)
#define MAX_BFLOAT FLT_MAX
#define MAX_DLF16 ((float)8581545984) // 2^32 * (1 + 511/512)
#define NUM_ALL_PRE_TFRMD_TYPES 5
#define NUM_DLFLOAT16_PRE_TFRMD_TYPES 3
#define NUM_QUANTIZED_PRE_TFRMD_TYPES 1
#define NUM_INDEX_PRE_TFRMD_TYPES 1
#define NUM_ALL_TFRMD_TYPES 4
#define NUM_DLFLOAT16_TFRMD_TYPES 1
#define NUM_QUANTIZED_TFRMD_TYPES 1
#define NUM_INDEX_TFRMD_TYPES 1
extern zdnn_data_types dlfloat_pre_tfrmd_types[NUM_DLFLOAT16_PRE_TFRMD_TYPES];
extern zdnn_data_types tfrmd_types[NUM_DLFLOAT16_TFRMD_TYPES];
#define NUM_PREV_LAYERS 2
#define NUM_BIASES_USAGES 2
#define NUM_NO_VCONCAT_INFOS 3
extern zdnn_concat_info prev_layers[NUM_PREV_LAYERS];
extern zdnn_concat_info biases_usages[NUM_BIASES_USAGES];
extern zdnn_concat_info no_vconcat_infos[NUM_NO_VCONCAT_INFOS];
void stdout_to_pipe();
void stderr_to_pipe();
void restore_stdout(char *buf, int buf_size);
void restore_stderr(char *buf, int buf_size);
bool isTelumI();
/* The following defines a macro to verify the hardware environment for our
* tests to successfully run in. Most tests require the proper HW environment
* to succeed. Even some of the others, like "..._fail" tests, are looking for
* a specific error, but can't rely on the root cause of that error without
* the proper HW environment. In the event the proper HW environment is not
* available, we will ignore or skip those tests.
*
*
* Simply invoke it in the Unity "setup" proc or within specific tests.
*/
#define VERIFY_HW_ENV \
if (!zdnn_is_nnpa_installed()) \
TEST_IGNORE_MESSAGE("NNPA required for test.");
/* The following defines a macro to verify the hardware version for our tests
* to successfully run in. Some tests require the proper hardware version to
* succeed. Even some of the others, like "..._fail" tests, are looking for a
* specific error, but can't rely on the root cause of that error without the
* proper hardware version. In the event the proper hardware version is not
* available, we will ignore or skip those tests.
*
* We require both that NNPA hardware and NNPA_PARMBLKFORMAT_1 be available,
* otherwise we must skip tests.
*
* Simply invoke it in the Unity "setup" proc or within specific tests.
*/
#define VERIFY_PARMBLKFORMAT_1 \
if (!is_query_parmblock_installed(NNPA_PARMBLKFORMAT_1)) \
TEST_IGNORE_MESSAGE("NNPA hardware version not available");
/**********************************************************
* Enhanced Unity Functions/Macros
**********************************************************/
// standard error message string buffer for all tests to send down to Unity
#define ERROR_MESSAGE_STR_LENGTH 512
extern char error_message[ERROR_MESSAGE_STR_LENGTH];
#define TEST_FAIL_MESSAGE_FORMATTED(f, ...) \
snprintf(error_message, ERROR_MESSAGE_STR_LENGTH, (f), __VA_ARGS__); \
TEST_FAIL_MESSAGE(error_message);
#define TEST_ASSERT_MESSAGE_FORMATTED(cond, f, ...) \
snprintf(error_message, ERROR_MESSAGE_STR_LENGTH, (f), __VA_ARGS__); \
TEST_ASSERT_MESSAGE((cond), error_message);
#define FUNCNAME_BANNER_LENGTH 256
extern zdnn_data_types test_datatype;
void UnityDefaultTestRunWithAllPreDataType(UnityTestFunction Func,
const char *FuncName,
const int FuncLineNum);
void UnityDefaultTestRunWithDLFloat16PreDataType(UnityTestFunction Func,
const char *FuncName,
const int FuncLineNum);
void UnityDefaultTestRunWithQuantizedPreDataType(UnityTestFunction Func,
const char *FuncName,
const int FuncLineNum);
void UnityDefaultTestRunWithIndexPreDataType(UnityTestFunction Func,
const char *FuncName,
const int FuncLineNum);
void UnityDefaultTestRunWithAllTfrmdDataType(UnityTestFunction Func,
const char *FuncName,
const int FuncLineNum);
void UnityDefaultTestRunWithDLFloat16TfrmdDataType(UnityTestFunction Func,
const char *FuncName,
const int FuncLineNum);
void UnityDefaultTestRunWithQuantizedTfrmdDataType(UnityTestFunction Func,
const char *FuncName,
const int FuncLineNum);
void UnityDefaultTestRunWithIndexTfrmdDataType(UnityTestFunction Func,
const char *FuncName,
const int FuncLineNum);
// Macro to run test func() against all pre-transformed data-types
#define RUN_TEST_ALL_PRE_DATATYPES(func) \
UnityDefaultTestRunWithAllPreDataType(func, #func, __LINE__);
// Macro to run test func() against all dlfloat16 pre-transformed data-types
#define RUN_TEST_ALL_DLFLOAT16_PRE_DATATYPES(func) \
UnityDefaultTestRunWithDLFloat16PreDataType(func, #func, __LINE__);
// Macro to run test func() against all quantized pre-transformed data-types
#define RUN_TEST_ALL_QUANTIZED_PRE_DATATYPES(func) \
UnityDefaultTestRunWithQuantizedPreDataType(func, #func, __LINE__);
// Macro to run test func() against all dlfloat16 pre-transformed data-types
#define RUN_TEST_ALL_INDEX_PRE_DATATYPES(func) \
UnityDefaultTestRunWithIndexPreDataType(func, #func, __LINE__);
// Macro to run test func() against all transformed data-types
#define RUN_TEST_ALL_TFRMD_DATATYPES(func) \
UnityDefaultTestRunWithAllTfrmdDataType(func, #func, __LINE__);
// Macro to run test func() against all dlfloat16 transformed data-types
#define RUN_TEST_ALL_DLFLOAT16_TFRMD_DATATYPES(func) \
UnityDefaultTestRunWithDLFloat16TfrmdDataType(func, #func, __LINE__);
// Macro to run test func() against all quantized transformed data-types
#define RUN_TEST_ALL_QUANTIZED_TFRMD_DATATYPES(func) \
UnityDefaultTestRunWithQuantizedTfrmdDataType(func, #func, __LINE__);
// Macro to run test func() against all index transformed data-types
#define RUN_TEST_ALL_INDEX_TFRMD_DATATYPES(func) \
UnityDefaultTestRunWithIndexTfrmdDataType(func, #func, __LINE__);
#endif /* TESTS_TESTSUPPORT_H_ */
zDNN-1.1.2/tests/third_party/ 0000775 0000000 0000000 00000000000 15000221702 0016004 5 ustar 00root root 0000000 0000000 zDNN-1.1.2/tests/third_party/Unity/ 0000775 0000000 0000000 00000000000 15000221702 0017114 5 ustar 00root root 0000000 0000000 zDNN-1.1.2/zdnn/ 0000775 0000000 0000000 00000000000 15000221702 0013262 5 ustar 00root root 0000000 0000000 zDNN-1.1.2/zdnn/Makefile 0000664 0000000 0000000 00000004171 15000221702 0014725 0 ustar 00root root 0000000 0000000 # SPDX-License-Identifier: Apache-2.0
#
# Copyright IBM Corp. 2021
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
_dummy := $(shell mkdir -p obj)
OBJDIR := obj
include ../config.make
INCDIR := $(CFLAGS_NOSEARCH) -I ../zdnn -I .
_dummy2 := $(shell mkdir -p $(SODIR))
CFLAGS := $(INCDIR) $(CFLAGS)
CXXFLAGS := $(INCDIR) $(CXXFLAGS)
H_FILES := $(filter-out $(wildcard convert*.h), $(wildcard *.h))
INIT_SRCFILE := zdnn_init.c
SRCFILES := $(filter-out $(INIT_SRCFILE), $(wildcard *.c) $(wildcard *.cpp))
# Add the configure generated header
H_FILES += ../config.h
OBJFILES := $(patsubst %.c,$(OBJDIR)/%.o,$(patsubst %.cpp,$(OBJDIR)/%.o,$(SRCFILES)))
INIT_OBJFILE := $(patsubst %.c,$(OBJDIR)/%.o,$(INIT_SRCFILE))
all: $(SODIR)/$(LIBNAME).so $(SODIR)/$(LIBNAME_PRIVATE).so $(ZDNN_MAKE_TARGETS)
$(INIT_OBJFILE): $(INIT_SRCFILE)
$(CC) $(CFLAGS_INIT) $(CFLAGS_SHARED) -c $< -o $@
$(OBJDIR)/%.o: %.c
$(CC) $(CFLAGS) $(CFLAGS_SHARED) -c $< -o $@
$(OBJDIR)/%.o: %.cpp
$(CXX) $(CXXFLAGS) $(CFLAGS_SHARED) -c $< -o $@
include $(ZDNN_TMAKE_FILES)
$(SODIR)/$(LIBNAME).so: $(INIT_OBJFILE) $(OBJFILES) $(H_FILES)
$(LD) $(LDFLAGS_SHARED) -o $(SODIR)/$(LIBNAME).so $(INIT_OBJFILE) $(OBJFILES)
.PHONY: clean
clean:
$(RM) $(OBJDIR)/*.o $(OBJDIR)/*.lst $(OBJDIR)/*.d *~ core $(SODIR)/* \
*.so* ../zdnn/zdnn_private.map \
zdnn.i zdnn.dynsyms symcheck
.PHONY: install
install: all install_shared $(ZDNN_INSTALL_TARGETS)
.PHONY: install_shared
install_shared:
$(INSTALL) -d $(DESTDIR)$(libdir)
$(INSTALL) -d $(DESTDIR)$(includedir)
$(INSTALL) -t $(DESTDIR)$(libdir) $(SODIR)/$(LIBNAME).so
$(INSTALL_DATA) -t $(DESTDIR)$(includedir) zdnn.h
zDNN-1.1.2/zdnn/aiu_lstm_gru.c 0000664 0000000 0000000 00000111610 15000221702 0016120 0 ustar 00root root 0000000 0000000 // SPDX-License-Identifier: Apache-2.0
/*
* Copyright IBM Corp. 2021, 2024
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#include "zdnn.h"
#include "zdnn_private.h"
#include
#include
// External users only need to specify between FWD, BWD, and BIDIR. However our
// directional_rnn() needs more detail. FWD vs BWD controls the order the
// timestep input is processed along with UNI vs BIDIR which affects how we move
// over the hn_output.
typedef enum rnn_internal_direction {
UNI_FWD,
UNI_BWD,
BIDIR_FWD,
BIDIR_BWD,
} rnn_internal_direction;
// Named indices for numbers passed between the internal methods
typedef enum rnn_integer_indices {
TS,
BATCH,
HID_SIZE,
IN_PAD,
GATES,
SLICEABLE_INPUTS,
NUM_INTEGER_INDICES // Not an index, used to set size of the array later.
} rnn_integer_indices;
// Must match order in sliceable_inputs[]!
// Named indices for sliceable ztensors passed in by the user.
typedef enum rnn_user_zten_indices {
H0,
IN_WEIGHTS,
IN_BIAS,
HID_WEIGHTS,
HID_BIAS,
NUM_INPUTS_GRU, // Not a tensor, used to set size of the array later.
C0 = NUM_INPUTS_GRU,
NUM_INPUTS_LSTM // Not a tensor, used to set size of the array later.
} rnn_user_zten_indices;
// Named indices for ztensors created internally during a RNN call.
typedef enum rnn_internal_zten_indices {
FUSED,
TS_FUSED,
BIAS_ADD,
PREV_H_OUT,
TS_H_OUT,
PREV_C_OUT,
NUM_INTERNAL_ZTENS_GRU, // Not a ztensor, used to set size of the array later.
TS_C_OUT = NUM_INTERNAL_ZTENS_GRU,
NUM_INTERNAL_ZTENS_LSTM // Not a ztensor, used to set size of the array later.
} rnn_internal_zten_indices;
// Named indices for descriptors created internally during a RNN call. These
// descriptors do not affect the work_area size.
typedef enum rnn_internal_desc_indices {
RNN_IN_TSFUSED_BIASADD_DESC,
NUM_INTERNAL_DESCS
} rnn_internal_desc_indices; // Not a descriptor, used to set size of the array.
// Named indices for descriptors created internally during a RNN call. These
// descriptors influence the size of the work_area.
typedef enum work_area_desc_indices {
FUSED_WA_DESC,
MATMULBIASADD_OUT_WA_DESC,
TS_HC_OUT_WA_DESC,
NUM_WA_DESCS
} work_area_desc_indices;
// Struct of work_area descriptors and their calculated sizes. This way we can
// run the calculation before slicing to get the total work_area size and not
// need to recalculate the buffer_sizes after slicing the directional calls.
typedef struct work_area_descriptor {
zdnn_tensor_desc desc;
size_t buffer_size;
} work_area_descriptor;
// Helper method that determines the size of the work area for a single
// direction. We create some descriptors to determine that size. Save that
// information in work_area_descriptor structs so we don't need to regenerate it
// later.
static size_t setup_work_area_descs(uint8_t function_code, const uint32_t *nums,
work_area_descriptor *wa_descs) {
// work_area ------------------------------------
// | FUSED |
// +---------------------------------------------
// | BIAS_ADD |
// +---------------------------------------------
// | TS_C_OUT (LSTM) / TS_H_OUT (GRU) |
// | TS_C_OUT (LSTM) / TS_H_OUT (GRU)